diff --git a/Engine/lib/bullet/BulletLicense.txt b/Engine/lib/bullet/BulletLicense.txt
index c3ec68c21..2e5680a8d 100644
--- a/Engine/lib/bullet/BulletLicense.txt
+++ b/Engine/lib/bullet/BulletLicense.txt
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+Copyright (c) 2003-2010 Erwin Coumans  http://continuousphysics.com/Bullet/
 
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
@@ -13,5 +13,6 @@ subject to the following restrictions:
 */
 
 
-Free for commercial use, but please mail bullet@erwincoumans.com to report projects, and join the forum at
-www.continuousphysics.com/Bullet/phpBB2
+Free for commercial use, please report projects in the forum at http://www.bulletphysics.org
+
+In case you want to display a Bullet logo in your software: you can download the Bullet logo in various vector formats and high resolution at the download section in http://bullet.googlecode.com
diff --git a/Engine/lib/bullet/Bullet_Faq.pdf b/Engine/lib/bullet/Bullet_Faq.pdf
deleted file mode 100644
index c4e8da94f..000000000
Binary files a/Engine/lib/bullet/Bullet_Faq.pdf and /dev/null differ
diff --git a/Engine/lib/bullet/CMakeLists.txt b/Engine/lib/bullet/CMakeLists.txt
index 1acafce20..594c908c6 100644
--- a/Engine/lib/bullet/CMakeLists.txt
+++ b/Engine/lib/bullet/CMakeLists.txt
@@ -1,15 +1,302 @@
-cmake_minimum_required(VERSION 2.4)
+cmake_minimum_required(VERSION 2.4.3)
+set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS true)
+
+#this line has to appear before 'PROJECT' in order to be able to disable incremental linking
+SET(MSVC_INCREMENTAL_DEFAULT ON)
 
 PROJECT(BULLET_PHYSICS)
-SET(BULLET_VERSION 2.75)
+SET(BULLET_VERSION 2.81)
+
+IF(COMMAND cmake_policy)
+   cmake_policy(SET CMP0003 NEW)
+ENDIF(COMMAND cmake_policy)
+
 
 IF (NOT CMAKE_BUILD_TYPE)
 # SET(CMAKE_BUILD_TYPE "Debug")
  SET(CMAKE_BUILD_TYPE "Release")
 ENDIF (NOT CMAKE_BUILD_TYPE) 
 
-# string (REPLACE "/D_WINDOWS" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-remove_definitions(-D_WINDOWS )
+SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_DEBUG")
+#MESSAGE("CMAKE_CXX_FLAGS_DEBUG="+${CMAKE_CXX_FLAGS_DEBUG})
+
+OPTION(USE_DOUBLE_PRECISION "Use double precision"	OFF)
+OPTION(USE_GRAPHICAL_BENCHMARK "Use Graphical Benchmark" ON)
+
+
+OPTION(USE_MSVC_RUNTIME_LIBRARY_DLL "Use MSVC Runtime Library DLL (/MD or /MDd)" OFF)
+OPTION(USE_MSVC_INCREMENTAL_LINKING "Use MSVC Incremental Linking" OFF)
+
+OPTION(USE_CUSTOM_VECTOR_MATH "Use custom vectormath library" OFF)
+
+IF (USE_CUSTOM_VECTOR_MATH)
+	ADD_DEFINITIONS(-DUSE_SYSTEM_VECTORMATH)
+	IF(WIN32)
+	SET (VECTOR_MATH_INCLUDE ${BULLET_PHYSICS_SOURCE_DIR}/src/vectormath/sse CACHE PATH "Vector Math library include path.")
+	ELSE(WIN32)
+	SET (VECTOR_MATH_INCLUDE ${BULLET_PHYSICS_SOURCE_DIR}/src/vectormath/scalar CACHE PATH "Vector Math library include path.")
+	ENDIF(WIN32)
+ENDIF(USE_CUSTOM_VECTOR_MATH)
+
+
+IF (APPLE OR MSVC)
+	OPTION(BUILD_MULTITHREADING "Use BulletMultiThreading" ON)
+ELSE()
+	OPTION(BUILD_MULTITHREADING "Use BulletMultiThreading" OFF)
+ENDIF()
+
+IF (BUILD_MULTITHREADING)
+	OPTION(USE_MULTITHREADED_BENCHMARK "Use Multithreaded Benchmark" OFF)
+	IF (USE_MULTITHREADED_BENCHMARK)
+		ADD_DEFINITIONS(-DUSE_PARALLEL_SOLVER_BENCHMARK -DUSE_PARALLEL_DISPATCHER_BENCHMARK)
+	ENDIF(USE_MULTITHREADED_BENCHMARK)
+
+	IF (MSVC OR APPLE)
+		OPTION(BUILD_MINICL_OPENCL_DEMOS "Build OpenCL demos for MiniCL (Generic CPU)"  ON)
+	ELSE()
+		OPTION(BUILD_MINICL_OPENCL_DEMOS "Build OpenCL demos for MiniCL (Generic CPU)" OFF)
+	ENDIF(MSVC OR APPLE)
+
+	IF(MSVC)
+		FIND_PATH(DIRECTX_SDK_BASE_DIR Include/D3D11.h PATH  $ENV{DXSDK_DIR} )
+		IF(DIRECTX_SDK_BASE_DIR)
+			OPTION(USE_DX11 "Use DirectX 11"	ON)
+		ELSE()
+			OPTION(USE_DX11 "Use DirectX 11"	OFF)
+		ENDIF()
+	
+		FIND_PATH(AMD_OPENCL_BASE_DIR include/CL/cl.h PATH  $ENV{ATISTREAMSDKROOT} $ENV{AMDAPPSDKROOT} )
+		IF(AMD_OPENCL_BASE_DIR)
+			#AMD adds an extras slash at the end of the ATISTREAMSDKROOT variable
+			SET(AMD_OPENCL_INCLUDES ${AMD_OPENCL_BASE_DIR}/include )
+			MESSAGE("AMD OPENCL SDK FOUND")
+			IF (CMAKE_CL_64)
+				SET(CMAKE_ATISTREAMSDK_LIBPATH 		${AMD_OPENCL_BASE_DIR}/lib/x86_64 )
+			ELSE(CMAKE_CL_64)
+				SET(CMAKE_ATISTREAMSDK_LIBPATH		${AMD_OPENCL_BASE_DIR}/lib/x86 )
+			ENDIF(CMAKE_CL_64)
+			SET(CMAKE_ATISTREAMSDK_LIBRARY		${CMAKE_ATISTREAMSDK_LIBPATH}/OpenCL.lib )
+			OPTION(BUILD_AMD_OPENCL_DEMOS "Build OpenCL demos for AMD (GPU or CPU)"	ON)
+			IF (CMAKE_CL_64)
+				SET(CMAK_GLEW_LIBRARY
+					${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew64s.lib		)
+			ELSE(CMAKE_CL_64)
+				SET(CMAK_GLEW_LIBRARY		${BULLET_PHYSICS_SOURCE_DIR}/Glut/glew32s.lib		)
+			ENDIF(CMAKE_CL_64)
+		ELSE()
+			OPTION(BUILD_AMD_OPENCL_DEMOS "Build OpenCL demos for AMD (GPU or CPU)"	OFF)
+		ENDIF()
+		
+		FIND_PATH(INTEL_OPENCL_BASE_DIR include/CL/cl.h PATH  $ENV{INTELOCLSDKROOT} )
+		IF(INTEL_OPENCL_BASE_DIR)
+			SET(INTEL_OPENCL_INCLUDES ${INTEL_OPENCL_BASE_DIR}/include )
+			MESSAGE("INTEL OPENCL SDK FOUND")
+			MESSAGE(${INTEL_OPENCL_INCLUDES})
+			IF (CMAKE_CL_64)
+				SET(CMAKE_INTELOCLSDK_LIBPATH 		${INTEL_OPENCL_BASE_DIR}/lib/x64 )
+			ELSE(CMAKE_CL_64)
+				SET(CMAKE_INTELOCLSDK_LIBPATH		${INTEL_OPENCL_BASE_DIR}/lib/x86 )
+			ENDIF(CMAKE_CL_64)
+			SET(INTEL_OPENCL_LIBRARIES ${CMAKE_INTELOCLSDK_LIBPATH}/OpenCL.lib)
+			OPTION(BUILD_INTEL_OPENCL_DEMOS "Build OpenCL demos for Intel (CPU)"	ON)
+		ELSE()
+			OPTION(BUILD_INTEL_OPENCL_DEMOS "Build OpenCL demos for Intel (CPU)"	OFF)
+		ENDIF()
+		
+		FIND_PATH(NVIDIA_OPENCL_BASE_DIR include/CL/cl.h PATH  $ENV{CUDA_PATH} )
+		IF(NVIDIA_OPENCL_BASE_DIR)
+			SET(NVIDIA_OPENCL_INCLUDES ${NVIDIA_OPENCL_BASE_DIR}/include )
+			MESSAGE("NVIDIA OPENCL SDK FOUND")
+			MESSAGE(${NVIDIA_OPENCL_INCLUDES})
+			IF (CMAKE_CL_64)
+				SET(CMAKE_NVSDKCOMPUTE_LIBPATH		${NVIDIA_OPENCL_BASE_DIR}/lib/x64 )
+			ELSE(CMAKE_CL_64)
+				SET(CMAKE_NVSDKCOMPUTE_LIBPATH		${NVIDIA_OPENCL_BASE_DIR}/lib/Win32	)
+			ENDIF(CMAKE_CL_64)
+		SET(NVIDIA_OPENCL_LIBRARIES		${CMAKE_NVSDKCOMPUTE_LIBPATH}/OpenCL.lib)
+		
+			OPTION(BUILD_NVIDIA_OPENCL_DEMOS "Build OpenCL demos for NVidia (GPU)"	ON)
+		ELSE()
+			OPTION(BUILD_NVIDIA_OPENCL_DEMOS "Build OpenCL demos for NVidia (GPU)"	OFF)
+		ENDIF()
+	ELSE(MSVC)
+		FIND_PATH(AMD_OPENCL_BASE_DIR include/CL/cl.h PATH  $ENV{ATISTREAMSDKROOT} $ENV{AMDAPPSDKROOT} )
+		IF(AMD_OPENCL_BASE_DIR)
+			#AMD adds an extras slash at the end of the ATISTREAMSDKROOT variable
+			SET(AMD_OPENCL_INCLUDES ${AMD_OPENCL_BASE_DIR}/include )
+			MESSAGE("AMD OPENCL SDK FOUND")
+			MESSAGE(${AMD_OPENCL_INCLUDES})
+			IF (CMAKE_CL_64)
+				SET(CMAKE_ATISTREAMSDK_LIBPATH 		${AMD_OPENCL_BASE_DIR}/lib/x86_64 )
+			ELSE(CMAKE_CL_64)
+				SET(CMAKE_ATISTREAMSDK_LIBPATH		${AMD_OPENCL_BASE_DIR}/lib/x86 )
+			ENDIF(CMAKE_CL_64)
+			OPTION(BUILD_AMD_OPENCL_DEMOS "Build OpenCL demos for AMD (GPU or CPU)"	ON)
+			SET(CMAKE_ATISTREAMSDK_LIBRARY		OpenCL )
+		ELSE()
+			OPTION(BUILD_AMD_OPENCL_DEMOS "Build OpenCL demos for AMD (GPU or CPU)"	OFF)
+		ENDIF(AMD_OPENCL_BASE_DIR)
+		
+    FIND_PATH(INTEL_OPENCL_INCLUDES CL/cl.h)
+    FIND_PATH(INTEL_OPENCL_ICD_CFG intelocl64.icd /etc/OpenCL/vendors)
+    FIND_LIBRARY(INTEL_OPENCL_LIBRARIES OpenCL PATH /usr/lib64)
+    IF (INTEL_OPENCL_INCLUDES AND INTEL_OPENCL_LIBRARIES AND INTEL_OPENCL_ICD_CFG)
+            MESSAGE("INTEL OPENCL SDK FOUND")
+            MESSAGE(${INTEL_OPENCL_LIBRARIES})
+            OPTION(BUILD_INTEL_OPENCL_DEMOS "Build OpenCL demos for Intel (CPU)"        ON)
+    ELSE ()
+            MESSAGE("INTEL OPENCL NOT FOUND")
+            OPTION(BUILD_INTEL_OPENCL_DEMOS "Build OpenCL demos for Intel (CPU)"        OFF)
+    ENDIF ()
+
+
+		FIND_PATH(NVIDIA_OPENCL_INCLUDES CL/cl.h)
+    FIND_PATH(NVIDIA_OPENCL_ICD_CFG nvidia.icd /etc/OpenCL/vendors)
+    FIND_LIBRARY(NVIDIA_OPENCL_LIBRARIES OpenCL PATH /usr/lib64 /usr/local/lib)
+    IF (NVIDIA_OPENCL_INCLUDES AND NVIDIA_OPENCL_LIBRARIES AND NVIDIA_OPENCL_ICD_CFG)
+                MESSAGE("NVidia OPENCL FOUND")
+			MESSAGE(${NVIDIA_OPENCL_LIBRARIES})
+			OPTION(BUILD_NVIDIA_OPENCL_DEMOS "Build OpenCL demos for NVidia (GPU)"	ON)
+		ELSE ()
+	                MESSAGE("NVidia OPENCL NOT FOUND")
+			OPTION(BUILD_NVIDIA_OPENCL_DEMOS "Build OpenCL demos for NVidia (GPU)"	OFF)
+		ENDIF ()
+	ENDIF(MSVC)
+
+ELSE(BUILD_MULTITHREADING)
+# 	SET(BUILD_NVIDIA_OPENCL_DEMOS OFF CACHE BOOL "Build OpenCL demos for NVidia" FORCE)
+# 	SET(BUILD_AMD_OPENCL_DEMOS OFF CACHE BOOL "Build OpenCL demos for AMD" FORCE)
+# 	SET(BUILD_INTEL_OPENCL_DEMOS OFF CACHE BOOL "Build OpenCL demos for Intel (CPU)" FORCE)
+# 	SET(BUILD_MINICL_OPENCL_DEMOS  OFF CACHE BOOL "Build OpenCL demos for MiniCL (Generic CPU)" FORCE)
+# 	SET(USE_DX11  OFF CACHE BOOL "Use DirectX 11" FORCE)
+# 	SET(USE_MULTITHREADED_BENCHMARK  OFF CACHE BOOL "Use Multithreaded Benchmark" FORCE)
+ENDIF(BUILD_MULTITHREADING)
+
+
+
+
+#SET(CMAKE_EXE_LINKER_FLAGS_INIT    "/STACK:10000000 /INCREMENTAL:NO")	
+#SET(CMAKE_EXE_LINKER_FLAGS    "/STACK:10000000 /INCREMENTAL:NO")	
+
+#MESSAGE("MSVC_INCREMENTAL_YES_FLAG"+${MSVC_INCREMENTAL_YES_FLAG})
+
+
+IF(MSVC)
+	IF (NOT USE_MSVC_INCREMENTAL_LINKING)
+		#MESSAGE("MSVC_INCREMENTAL_DEFAULT"+${MSVC_INCREMENTAL_DEFAULT})
+		SET( MSVC_INCREMENTAL_YES_FLAG "/INCREMENTAL:NO")
+		 
+		STRING(REPLACE "INCREMENTAL:YES" "INCREMENTAL:NO" replacementFlags ${CMAKE_EXE_LINKER_FLAGS_DEBUG}) 
+		SET(CMAKE_EXE_LINKER_FLAGS_DEBUG "/INCREMENTAL:NO ${replacementFlags}" )
+		MESSAGE("CMAKE_EXE_LINKER_FLAGS_DEBUG=${CMAKE_EXE_LINKER_FLAGS_DEBUG}")
+	
+#		STRING(REPLACE "INCREMENTAL:YES" "INCREMENTAL:NO" replacementFlags2 ${CMAKE_EXE_LINKER_FLAGS}) 
+#		SET(CMAKE_EXE_LINKER_FLAGS ${replacementFlag2}) 
+#		STRING(REPLACE "INCREMENTAL:YES" "" replacementFlags3 ${CMAKE_EXTRA_LINK_FLAGS}) 
+#		SET(CMAKE_EXTRA_LINK_FLAGS ${replacementFlag3}) 
+		
+		
+		STRING(REPLACE "INCREMENTAL:YES" "INCREMENTAL:NO" replacementFlags3 ${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO})
+		SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO ${replacementFlags3}) 
+		SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "/INCREMENTAL:NO ${replacementFlags3}" )
+	
+	ENDIF (NOT USE_MSVC_INCREMENTAL_LINKING)
+
+	IF (NOT USE_MSVC_RUNTIME_LIBRARY_DLL)
+		#We statically link to reduce dependancies
+		FOREACH(flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+			IF(${flag_var} MATCHES "/MD")
+				STRING(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+			ENDIF(${flag_var} MATCHES "/MD")
+			IF(${flag_var} MATCHES "/MDd")
+				STRING(REGEX REPLACE "/MDd" "/MTd" ${flag_var} "${${flag_var}}")
+			ENDIF(${flag_var} MATCHES "/MDd")
+		ENDFOREACH(flag_var)
+	ENDIF (NOT USE_MSVC_RUNTIME_LIBRARY_DLL)
+
+	OPTION(USE_MSVC_SSE "Use MSVC /arch:sse option"	ON)
+	IF (USE_MSVC_SSE)
+		ADD_DEFINITIONS(/arch:SSE)
+	ENDIF()
+	OPTION(USE_MSVC_FAST_FLOATINGPOINT "Use MSVC /fp:fast option"	ON)
+	IF (USE_MSVC_FAST_FLOATINGPOINT)
+		ADD_DEFINITIONS(/fp:fast)
+  ENDIF()
+ENDIF(MSVC)
+
+
+
+IF (WIN32)
+OPTION(INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES "Create MSVC projectfiles that can be distributed" OFF)
+
+IF (INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+	SET (LIBRARY_OUTPUT_PATH ${BULLET_PHYSICS_SOURCE_DIR}/lib CACHE PATH "Single output directory for building all libraries.")
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${BULLET_PHYSICS_SOURCE_DIR})
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${BULLET_PHYSICS_SOURCE_DIR})
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${BULLET_PHYSICS_SOURCE_DIR})
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_MINSIZEREL ${BULLET_PHYSICS_SOURCE_DIR})
+	SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELWITHDEBINFO ${BULLET_PHYSICS_SOURCE_DIR})
+ELSE()
+	SET (LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/lib CACHE PATH "Single output directory for building all libraries.")
+ENDIF()
+
+
+
+OPTION(INTERNAL_CREATE_MSVC_RELATIVE_PATH_PROJECTFILES "Create MSVC projectfiles with relative paths" OFF)
+OPTION(INTERNAL_ADD_POSTFIX_EXECUTABLE_NAMES "Add MSVC postfix for executable names (_Debug)" OFF)
+
+SET(CMAKE_DEBUG_POSTFIX "_Debug" CACHE STRING "Adds a postfix for debug-built libraries.")
+SET(CMAKE_MINSIZEREL_POSTFIX "_MinsizeRel" CACHE STRING "Adds a postfix for MinsizeRelease-built libraries.")
+SET(CMAKE_RELWITHDEBINFO_POSTFIX "_RelWithDebugInfo" CACHE STRING "Adds a postfix for ReleaseWithDebug-built libraries.")
+
+
+
+
+
+IF (INTERNAL_CREATE_MSVC_RELATIVE_PATH_PROJECTFILES)
+SET(CMAKE_SUPPRESS_REGENERATION  1)
+SET(CMAKE_USE_RELATIVE_PATHS 1)
+ENDIF(INTERNAL_CREATE_MSVC_RELATIVE_PATH_PROJECTFILES)
+
+ENDIF (WIN32)
+
+
+OPTION(BUILD_CPU_DEMOS "Build original Bullet CPU demos"        ON)
+
+
+
+OPTION(INTERNAL_UPDATE_SERIALIZATION_STRUCTURES "Internal update serialization structures" OFF)
+IF (INTERNAL_UPDATE_SERIALIZATION_STRUCTURES)
+ADD_DEFINITIONS( -DBT_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES)
+ENDIF (INTERNAL_UPDATE_SERIALIZATION_STRUCTURES)
+
+IF (USE_DOUBLE_PRECISION)
+ADD_DEFINITIONS( -DBT_USE_DOUBLE_PRECISION)
+SET( BULLET_DOUBLE_DEF "-DBT_USE_DOUBLE_PRECISION")
+ENDIF (USE_DOUBLE_PRECISION)
+
+IF(USE_GRAPHICAL_BENCHMARK)
+ADD_DEFINITIONS( -DUSE_GRAPHICAL_BENCHMARK)
+ENDIF (USE_GRAPHICAL_BENCHMARK)
+
+IF (WIN32)
+OPTION(USE_GLUT "Use Glut"	ON)
+ADD_DEFINITIONS( -D_IRR_STATIC_LIB_ )
+ADD_DEFINITIONS( -D_CRT_SECURE_NO_WARNINGS )
+ADD_DEFINITIONS( -D_CRT_SECURE_NO_DEPRECATE )
+ADD_DEFINITIONS( -D_SCL_SECURE_NO_WARNINGS )
+
+IF (USE_GLUT AND MSVC)
+        string (REPLACE "/D_WINDOWS" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+        remove_definitions(-D_WINDOWS )
+ENDIF()
+
+
+
+ELSE(WIN32)
+OPTION(USE_GLUT "Use Glut"	ON)
+ENDIF(WIN32)
+
    
 IF(COMMAND cmake_policy)
    cmake_policy(SET CMP0003 NEW)
@@ -21,52 +308,113 @@ ENDIF(COMMAND cmake_policy)
 
 FIND_PACKAGE(OpenGL)
 IF (OPENGL_FOUND)
-MESSAGE("OPENGL FOUND")
-MESSAGE(${OPENGL_LIBRARIES})
+	MESSAGE("OPENGL FOUND")
+	MESSAGE(${OPENGL_LIBRARIES})
 ELSE (OPENGL_FOUND)
-MESSAGE("OPENGL NOT FOUND")
-SET(OPENGL_gl_LIBRARY opengl32)
-SET(OPENGL_glu_LIBRARY glu32)
+	MESSAGE("OPENGL NOT FOUND")
+	SET(OPENGL_gl_LIBRARY opengl32)
+	SET(OPENGL_glu_LIBRARY glu32)
 ENDIF (OPENGL_FOUND)
 
 # ADD_DEFINITIONS(-DBT_USE_FREEGLUT)
 
 FIND_PACKAGE(GLU)
 
-FIND_PACKAGE(GLUT)
-IF (GLUT_FOUND)
-MESSAGE("GLUT FOUND")
-MESSAGE(${GLUT_glut_LIBRARY})
-ELSE (GLUT_FOUND)
+IF (USE_GLUT)
+	FIND_PACKAGE(GLUT)
+	IF (GLUT_FOUND)
+		MESSAGE("GLUT FOUND")
+		MESSAGE(${GLUT_glut_LIBRARY})
+	ELSE (GLUT_FOUND)
+		IF (MINGW)
+			MESSAGE ("GLUT NOT FOUND not found, trying to use MINGW glut32")
+			SET(GLUT_glut_LIBRARY glut32)
+			#TODO add better GLUT detection for MinGW
+			SET(GLUT_FOUND TRUE)
+		ENDIF (MINGW)
+		IF (MSVC)
+			SET(GLUT_FOUND TRUE)
+			IF (CMAKE_CL_64)
+				message("Win64 using Glut/glut64.lib")
+				SET(GLUT_glut_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glut64.lib)
+			ELSE(CMAKE_CL_64)
+				message("Win32 using Glut/glut32.lib")
+				SET(GLUT_glut_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glut32.lib)
+			ENDIF (CMAKE_CL_64)
+		 	INCLUDE_DIRECTORIES(${BULLET_PHYSICS_SOURCE_DIR}/Glut)
+		ELSE()
+			MESSAGE("GLUT NOT FOUND")	
+		ENDIF (MSVC)
+	ENDIF (GLUT_FOUND)
 
-IF (MINGW)
-MESSAGE ("GLUT NOT FOUND not found, trying to use MINGW glut32")
-SET(GLUT_glut_LIBRARY glut32)
-ENDIF (MINGW)
-
-IF (MSVC)
-MESSAGE ("GLUT NOT FOUND, trying to use Bullet/Glut/glut32.lib for MSVC")
-SET(GLUT_glut_LIBRARY ${BULLET_PHYSICS_SOURCE_DIR}/Glut/glut32.lib)
-ENDIF (MSVC)
-ENDIF (GLUT_FOUND)
-
-
-IF (WIN32)
-  INCLUDE_DIRECTORIES(${BULLET_PHYSICS_SOURCE_DIR}/Glut)
-ELSE (WIN32)
-  # This is the lines for linux.  This should always work if everything is installed and working fine.
-  INCLUDE_DIRECTORIES(/usr/include /usr/local/include ${GLUT_INCLUDE_DIR}) 
-ENDIF (WIN32)
+	IF(NOT WIN32)	
+		# This is added for linux. This should always work if everything is installed and working fine.
+		INCLUDE_DIRECTORIES(/usr/include /usr/local/include) 
+	ENDIF()
+ENDIF(USE_GLUT)
 
 
 OPTION(BUILD_DEMOS "Set when you want to build the demos" ON)
 IF(BUILD_DEMOS)
-  SUBDIRS(Demos)
+	IF(EXISTS ${BULLET_PHYSICS_SOURCE_DIR}/Demos AND IS_DIRECTORY ${BULLET_PHYSICS_SOURCE_DIR}/Demos)
+		SUBDIRS(Demos)
+	ENDIF()
 ENDIF(BUILD_DEMOS)
 
+#  "Demos_ps3")
+IF (MSVC)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF(EXISTS ${BULLET_PHYSICS_SOURCE_DIR}/Demos_ps3 AND IS_DIRECTORY ${BULLET_PHYSICS_SOURCE_DIR}/Demos_ps3)
+		MESSAGE("Demos_ps3 found")
+			SUBDIRS(Demos_ps3)
+		ENDIF()
+	ENDIF()
+ENDIF(MSVC)
+
+
 OPTION(BUILD_EXTRAS "Set when you want to build the extras" ON)
 IF(BUILD_EXTRAS)
   SUBDIRS(Extras)
 ENDIF(BUILD_EXTRAS)
 
+#Maya Dynamica plugin is moved to http://dynamica.googlecode.com
+
 SUBDIRS(src)
+
+IF("${CMAKE_GENERATOR}" MATCHES "Unix Makefiles")
+	OPTION(INSTALL_LIBS "Set when you want to install libraries" ON)
+ELSE()
+	IF(APPLE AND FRAMEWORK)
+		OPTION(INSTALL_LIBS "Set when you want to install libraries" ON)
+	ELSE()
+#by default, don't enable the 'INSTALL' option for Xcode and MSVC projectfiles
+		OPTION(INSTALL_LIBS "Set when you want to install libraries" OFF)
+	ENDIF()
+ENDIF()
+
+IF(INSTALL_LIBS)
+	SET (LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32/64)" )
+	SET (LIB_DESTINATION "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}" CACHE STRING "Library directory name")
+	## the following are directories where stuff will be installed to
+	SET(INCLUDE_INSTALL_DIR      "${CMAKE_INSTALL_PREFIX}/include/bullet/" CACHE PATH "The subdirectory to the header prefix")
+	SET(PKGCONFIG_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}/pkgconfig/" CACHE STRING "Base directory for pkgconfig files")
+	IF(NOT WIN32)
+	  CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/bullet.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/bullet.pc @ONLY)
+  	INSTALL(
+	    FILES
+    	${CMAKE_CURRENT_BINARY_DIR}/bullet.pc
+    	DESTINATION
+    	${PKGCONFIG_INSTALL_PREFIX})
+	ENDIF(NOT WIN32)
+ENDIF(INSTALL_LIBS)
+
+#INSTALL of other files requires CMake 2.6
+IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+	OPTION(INSTALL_EXTRA_LIBS "Set when you want extra libraries installed" OFF)
+ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+OPTION(BUILD_UNIT_TESTS "Build Unit Tests"	OFF)
+
+IF (BUILD_UNIT_TESTS)
+	SUBDIRS(UnitTests)
+ENDIF()
diff --git a/Engine/lib/bullet/ChangeLog b/Engine/lib/bullet/ChangeLog
index 8d154dc09..4a0ccf492 100644
--- a/Engine/lib/bullet/ChangeLog
+++ b/Engine/lib/bullet/ChangeLog
@@ -1,11 +1,57 @@
 Bullet Continuous Collision Detection and Physics Library
 Primary author and maintainer: Erwin Coumans
 
-Please see http://code.google.com/p/bullet/source/list for more complete log in Subversion
+This ChangeLog is incomplete, for an up-to-date list of all fixed issues see http://bullet.googlecode.com
+using http://tinyurl.com/yabmjjj
+
+2012 September 10
+	- Bullet 2.81 release preparation
+	
+2011 September 15
+	- Bullet 2.79 release, revision 2433 (mainly a bugfix release)
+	- Revert a change in 2.78 related to speculative contacts (it has undesired side effects)
+	- Use HACD Hierachical Approximate Convex Decomposition (thanks to Khaled Mammou and Sujeon Kim)
+	- Add Intel cmake-build support for OpenCL accelerated cloth/particle
+	- add premake4 build system support to autogenerate visual studio project files that can be shipped (see msvc folder)
+	- preliminary build support for Google NativeClient, using premake4 (see msvc folder)
+			
+	
+2011 April 8
+	- Bullet 2.78 release 2383
+	- Added FractureDemo
+	- Added Separatinx Axis Test and Polyhedral Clipping support (See InternalEdgeDemo)
+	- Added speculative contacts as CCD response method (See CcdPhysicsDemo)
+	- OpenCL and DirectCompute cloth as basic support for capsule collision
+
+2010 September 7
+	- autotools now uses CamelCase naming for libraries just like cmake:
+	libbulletdynamics -> libBulletDynamics, libbulletmath -> libLinearMath
+
+2010 July 21
+	- Preparing for Bullet 2.77 release, around revision r2135
+	- Added an OpenCL particle demo, running on NVidia, AMD and MiniCL
+	Thanks to NVidia for the original particle demo from their OpenCL SDK
+	- Added GPU deformable object solvers for OpenCL and DirectCompute, and a DirectX 11 cloth demo
+	Thanks to AMD
+	- Create a separate library for MiniCL, 
+	MiniCL is a rudimentary OpenCL wrapper that allows to compile OpenCL kernels for multi-core CPU, using Win32 Threads or Posix
+	- Moved vectormath into Bullet/src, and added a SSE implementation
+	- Added a btParallelConstraintSolver, mainly for PlayStation 3 Cell SPUs (although it runs fine on CPU too)
+
+2010 March 6
+	- Dynamica Maya plugin (and COLLADA support) is moved to http://dynamica.googlecode.com
+
+2010 February
+	- Bullet 2.76 release, revision 2010
+	- support for the .bullet binary file format
+	- btInternalEdgeUtility to adjust unwanted collisions against internal triangle edges
+	- Improved Maya Dynamica plugin with better constraint authoring and .bullet file export
+
 
 2009 September 17
-	- Minor update to Bullet 2.75 release, revision 1770
+	- Minor update to Bullet 2.75 release, revision 1776
 	- Support for btConvex2dShape, check out Bullet/Demos/Box2dDemo
+	- Fixes in build systems
 	- Minor fix in btGjkPairDetector
 	- Initialize world transform for btCollisionShape in constructor
 
diff --git a/Engine/lib/bullet/Doxyfile b/Engine/lib/bullet/Doxyfile
index 2f9a6b27e..d483fe4b2 100644
--- a/Engine/lib/bullet/Doxyfile
+++ b/Engine/lib/bullet/Doxyfile
@@ -13,6 +13,9 @@
 # General configuration options
 #---------------------------------------------------------------------------
 
+
+
+
 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
 # by quotes) that should identify the project. 
 PROJECT_NAME           = "Bullet Collision Detection & Physics Library"
@@ -399,7 +402,9 @@ HTML_ALIGN_MEMBERS     = YES
 
 GENERATE_HTMLHELP      = YES
 
-HHC_LOCATION = "C:\Program Files\HTML Help Workshop\hhc.exe"
+# HHC_LOCATION = "C:\Program Files\HTML Help Workshop\hhc.exe"
+HHC_LOCATION = "C:\Program Files (x86)\HTML Help Workshop\hhc.exe"
+
 
 HTML_FILE_EXTENSION = .html
 HTML_HEADER = 
@@ -589,7 +594,7 @@ MACRO_EXPANSION        = YES
 # then the macro expansion is limited to the macros specified with the 
 # PREDEFINED and EXPAND_AS_PREDEFINED tags. 
 
-EXPAND_ONLY_PREDEF     = NO
+EXPAND_ONLY_PREDEF     = YES
 
 # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
 # in the INCLUDE_PATH (see below) will be search if a #include is found. 
@@ -615,7 +620,14 @@ INCLUDE_FILE_PATTERNS  =
 # or name=definition (no spaces). If the definition and the = are 
 # omitted =1 is assumed. 
 
-PREDEFINED             = 
+PREDEFINED		=	"ATTRIBUTE_ALIGNED128(x)=x" \
+					"ATTRIBUTE_ALIGNED16(x)=x" \
+					"SIMD_FORCE_INLINE=inline" \
+					"VECTORMATH_FORCE_INLINE=inline" \
+					"USE_WIN32_THREADING=1"\
+					"USE_PTHREADS=1"\
+					"_WIN32=1"
+					
 
 # If the MACRO_EXPANSION and EXPAND_PREDEF_ONLY tags are set to YES then 
 # this tag can be used to specify a list of macro names that should be expanded. 
@@ -718,6 +730,11 @@ MAX_DOT_GRAPH_HEIGHT   = 1024
 
 GENERATE_LEGEND        = YES
 
+
+# delete intermediate dot files?
+
+DOT_CLEANUP = YES
+
 #---------------------------------------------------------------------------
 # Configuration::addtions related to the search engine   
 #---------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/INSTALL b/Engine/lib/bullet/INSTALL
index 0752e2816..0f42fb52e 100644
--- a/Engine/lib/bullet/INSTALL
+++ b/Engine/lib/bullet/INSTALL
@@ -1,24 +1,22 @@
 Bullet Collision Detection and Physics Library
 
+See also http://bulletphysics.org/mediawiki-1.5.8/index.php/Creating_a_project_from_scratch
+
 ** Windows Compilation **
 
-Under Windows, projectfiles for Visual Studio version 6,7,7.1 and 8 are 
-available in msvc/<version>.  For example, for Visual Studio 2005, open 
-msvc/8/wksbullet.sln
+	Open the Microsoft Visual Studio solution in msvc/20xx/BULLET_PHYSICS.sln
 
-The ColladaDemo and ConvexDecomposition demo needs to be able to locate the 
-data files (jenga.dae and file.obj) in the current directory. Make sure Visual 
-Studio points to the right folder (..\..).
-
-Alternatively use CMake to autogenerate a build system for Windows:
+Alternatively, use CMake to autogenerate a build system for Windows:
 	
 	- Download/install CMake from www.cmake.org or package manager
+	- Use cmake-gui or
 	- List available build systems by running 'cmake' in the Bullet root folder
+	- Use cmake-gui 
 	- Create a build system using the -G option for example:
 	
 	cmake . -G "Visual Studio 9 2008" or
 	cmake . -G "Visual Studio 9 2008 Win64"
-		 
+
 
 ** Linux Compilation **
 
@@ -26,6 +24,11 @@ Alternatively use CMake to autogenerate a build system for Windows:
     CMake is like autoconf in that it will create build scripts which are then 
     used for the actual compilation
 
+	- List available build systems by running 'cmake' in the Bullet root folder
+	- Create a build system using the -G option for example:
+
+	cmake . -G "Unix Makefiles"
+
   - There are some options for cmake builds:
       BUILD_SHARED_LIBS: default 'OFF', set to 'ON' to build .so libraries
       BUILD_EXTRAS: default 'ON', compiles additional libraries in 'Extras'
@@ -33,6 +36,8 @@ Alternatively use CMake to autogenerate a build system for Windows:
       CMAKE_INSTALL_PREFIX: default '/usr/local', the installation path.
       CMAKE_INSTALL_RPATH: if you install outside a standard ld search path,
         then you should set this to the installation lib path.
+      CMAKE_BUILD_TYPE: default 'Release', can include debug symbols with
+        either 'Debug' or 'RelWithDebInfo'.
     Other options may be discovered by 'cmake --help-variable-list' and
     'cmake --help-variable OPTION'
 
@@ -41,7 +46,7 @@ Alternatively use CMake to autogenerate a build system for Windows:
     also produce Eclipse or KDevelop project files.  See 'cmake --help' to see 
     what "generators" are available in your environment, selected via '-G'.
         For example:
-        cmake -DBUILD_SHARED_LIBS=ON
+        cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=RelWithDebugInfo
 
   - Assuming using the default Makefile output from cmake, run 'make' to 
     build, and then 'make install' if you wish to install.
@@ -53,6 +58,12 @@ Alternatively use CMake to autogenerate a build system for Windows:
     CMake is like autoconf in that it will create build scripts which are then 
     used for the actual compilation
 
+	- List available build systems by running 'cmake' in the Bullet root folder
+	- Create a build system using the -G option for example:
+
+  cmake . -G Xcode
+  cmake . -G "Unix Makefiles"
+
   - There are some options for cmake builds:
       BUILD_SHARED_LIBS: default 'OFF', set to 'ON' to build .dylib libraries
       BUILD_EXTRAS: default 'ON', compiles additional libraries in 'Extras'
@@ -60,6 +71,10 @@ Alternatively use CMake to autogenerate a build system for Windows:
       CMAKE_INSTALL_PREFIX: default '/usr/local', the installation path.
       CMAKE_INSTALL_NAME_DIR: if you install outside a standard ld search 
         path, then you should set this to the installation lib/framework path. 
+      CMAKE_OSX_ARCHITECTURES: defaults to the native architecture, but can be
+        set to a semicolon separated list for fat binaries, e.g. ppc;i386;x86_64
+      CMAKE_BUILD_TYPE: default 'Release', can include debug symbols with
+        either 'Debug' or 'RelWithDebInfo'.
 
     To build framework bundles:
       FRAMEWORK: default 'OFF', also requires 'BUILD_SHARED_LIBS' set ON
@@ -78,23 +93,19 @@ Alternatively use CMake to autogenerate a build system for Windows:
         For example:
         cmake -DBUILD_SHARED_LIBS=ON -DFRAMEWORK=ON \
               -DCMAKE_INSTALL_PREFIX=/Library/Frameworks \
-              -DCMAKE_INSTALL_NAME_DIR=/Library/Frameworks
+              -DCMAKE_INSTALL_NAME_DIR=/Library/Frameworks \
+              -DCMAKE_OSX_ARCHITECTURES='ppc;i386;x86_64' \
+              -DCMAKE_BUILD_TYPE=RelWithDebugInfo
 
   - Assuming using the default Makefile output from cmake, run 'make' to build 
     and then 'make install'.
 
 
-** Alternative Mac OS X and Linux via 'jam' or autoconf/make **
+** Alternative Mac OS X and Linux via autoconf/make **
   - at the command line:
     ./autogen.sh
     ./configure
-  - 'jam' or 'make' depending on preference
-  - If jam is not available for your system, you can compile it, jam sources 
-    are included with the Bullet sources in jam-2.5
-      - compiling jam:
-        cd jam-2.5
-        make
-        sudo make install
+    make
 
 
-** For more help, visit http://www.bulletphysics.com **
+** For more help, visit http://www.bulletphysics.org **
diff --git a/Engine/lib/bullet/Jamfile.in b/Engine/lib/bullet/Jamfile.in
deleted file mode 100644
index eb49df511..000000000
--- a/Engine/lib/bullet/Jamfile.in
+++ /dev/null
@@ -1,66 +0,0 @@
-TOP ?= "@top_srcdir@" ;
-BUILDTOP ?= "@top_builddir@" ;
-
-SubDir TOP ;
-
-IncludeDir ;
-IncludeDir src ;
-
-IncludeDir $(BUILDTOP) : : literal transient ;
-
-CleanDir clean :
-    out ;
-Clean distclean :
-    aclocal.m4
-    config.h
-    config.h.in~
-    config.log
-    config.status
-    config.status.lineno
-    config.cache
-    configure.lineno
-    Jamconfig
-    Jamfile ;
-CleanDir distclean :
-    autom4te.cache ;
-Depends distclean : clean ;
-
-Clean maintainerclean :
-  config.h.in
-  configure ;
-Depends maintainerclean : distclean ;
-
-Help distclean : "Remove built targets and configuration" ;
-Help maintainerclean :
-    "Remove built targets, configuration, and generated files." ;
-
-ApplicationIconDefault win32 : all : bullet_ico.ico : $(TOP) msvc ;
-
-MsvcGenSubDir TOP msvc : common ;
-MsvcGenSubDir TOP msvc 6 : 6 ;
-MsvcGenSubDir TOP msvc 7 : 7 ;
-MsvcGenSubDir TOP msvc 71 : 71 ;
-MsvcGenSubDir TOP msvc sn71 : sn71 ;
-MsvcGenSubDir TOP msvc 8 : 8 ;
-MsvcGenSubDir TOP msvc xenon8 : xenon8 ;
-MsvcGenTemplateDir TOP mk msvcgen ;
-MsvcGenWorkspace bullet : : "grp.+_(?!bullet$)" ;
-MsvcGenWorkspace bullet_corelib : libbulletcollision libbulletdynamics libbulletmath libbulletmultithreaded : "grp.+_(?!bullet_corelib$)" ;
-
-# Set project-specific compiler and linker options for msvcgen.
-MsvcGenConfig GL.AVAILABLE : yes ;
-MsvcGenConfig GL.LFLAGS :  ;
-MsvcGenConfig GL.LIBS : opengl32.lib ;
-MsvcGenConfig GLUT.AVAILABLE : yes ;
-MsvcGenConfig GLUT.CFLAGS : ;
-MsvcGenConfig GLUT.LFLAGS : ;
-MsvcGenConfig GLUT.INCDIRS : "../../Glut" ;
-MsvcGenConfig GLUT.LIBDIRS : "../../Glut" ;
-MsvcGenConfig GLUT.LIBS : glut32.lib ;
-MsvcGenConfig GLEW.LIBS : glew32.lib ;
-
-SubInclude TOP src ;
-SubInclude TOP Extras ;
-SubInclude TOP Demos ;
-
-Depends install_config : [ DoInstall bullet.pc : $(libdir)/pkgconfig ] ;
diff --git a/Engine/lib/bullet/Jamrules b/Engine/lib/bullet/Jamrules
deleted file mode 100644
index e486c3345..000000000
--- a/Engine/lib/bullet/Jamrules
+++ /dev/null
@@ -1,21 +0,0 @@
-if ! $(BUILDTOP)
-{
-BUILDTOP = . ;
-}
-
-# Include configuration.
-JAMCONFIG ?= $(BUILDTOP)/Jamconfig ;
-include $(JAMCONFIG) ;
-
-# Set up compiler flags.
-# Unfortunately, we can not use FDefines here since Boost Jam does not have it,
-# and we have not yet included mk/jam/build.jam which provides an emulation
-# layer for Boost.  We can not include build.jam earlier because these flags
-# need to be defined before build.jam is included.  :-(
-COMPILER.CFLAGS += -Wall -Wno-unknown-pragmas ;
-COMPILER.CFLAGS.optimize += -O3 -fomit-frame-pointer -ffast-math ;
-COMPILER.CFLAGS.debug += -g3 ;
-COMPILER.CFLAGS.profile += -gp -O3 ;
-
-# Include CS build rules
-include $(TOP)/mk/jam/build.jam ;
diff --git a/Engine/lib/bullet/LICENSE b/Engine/lib/bullet/LICENSE
deleted file mode 100644
index ba24a53c2..000000000
--- a/Engine/lib/bullet/LICENSE
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-All files in the Bullet/src folder are under this Zlib license.
-Optional Extras/GIMPACT and Extras/GIMPACTBullet is also under ZLib license. Other optional external libraries in  Extras/Demos have own license,see respective files.
-
-This means Bullet can freely be used in any software, including commercial and console software. A Playstation 3 optimized version is available through Sony.
diff --git a/Engine/lib/bullet/Makefile.am b/Engine/lib/bullet/Makefile.am
index b243b33a3..a9b97a8e6 100644
--- a/Engine/lib/bullet/Makefile.am
+++ b/Engine/lib/bullet/Makefile.am
@@ -1,7 +1,7 @@
 if CONDITIONAL_BUILD_DEMOS
 SUBDIRS=src Extras Demos
 else
-SUBDIRS=src Extras
+SUBDIRS=src 
 endif
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = bullet.pc
diff --git a/Engine/lib/bullet/NEWS b/Engine/lib/bullet/NEWS
index d976ae431..dec9f0fd9 100644
--- a/Engine/lib/bullet/NEWS
+++ b/Engine/lib/bullet/NEWS
@@ -1,4 +1,5 @@
 
-For news, visit the Bullet Physics Forum at
-http://www.continuousphysics.com/Bullet/phpBB2/viewforum.php?f=9
+For news, visit the Bullet Physics forums at
+http://www.bulletphysics.org and http://bullet.googlecode.com
+
 
diff --git a/Engine/lib/bullet/README b/Engine/lib/bullet/README
index 8f3705388..1eda762c0 100644
--- a/Engine/lib/bullet/README
+++ b/Engine/lib/bullet/README
@@ -1,7 +1,6 @@
 
 Bullet is a 3D Collision Detection and Rigid Body Dynamics Library for games and animation.
 Free for commercial use, including Playstation 3, open source under the ZLib License.
-Discrete and continuous collision detection, integrated into Blender 3D, and COLLADA 1.4 Physics import. 
 
 See the Bullet_User_Manual.pdf for more info and visit the Bullet Physics Forum at
-http://bulletphysics.com
+http://bulletphysics.org
diff --git a/Engine/lib/bullet/VERSION b/Engine/lib/bullet/VERSION
index 8f5752a10..00ddb02a1 100644
--- a/Engine/lib/bullet/VERSION
+++ b/Engine/lib/bullet/VERSION
@@ -1 +1 @@
-2.75
+2.81
diff --git a/Engine/lib/bullet/acinclude.m4 b/Engine/lib/bullet/acinclude.m4
index 6adb73841..0505895ce 100644
--- a/Engine/lib/bullet/acinclude.m4
+++ b/Engine/lib/bullet/acinclude.m4
@@ -1416,7 +1416,6 @@ AC_DEFUN([CS_CHECK_TEMPLATE_TOOLKIT2],
 #-----------------------------------------------------------------------------
 AC_DEFUN([CS_PROG_CC],[
     CFLAGS="$CFLAGS" # Filter undesired flags
-    AC_PROG_CC
     AS_IF([test -n "$CC"],[
 	CS_EMIT_BUILD_PROPERTY([CMD.CC], [$CC])
 	CS_EMIT_BUILD_PROPERTY([COMPILER.CFLAGS], [$CPPFLAGS $CFLAGS], [+])
@@ -1429,7 +1428,6 @@ AC_DEFUN([CS_PROG_CC],[
 
 AC_DEFUN([CS_PROG_CXX],[
     CXXFLAGS="$CXXFLAGS" # Filter undesired flags
-    AC_PROG_CXX
     AS_IF([test -n "$CXX"],[
 	CS_EMIT_BUILD_PROPERTY([CMD.C++], [$CXX])
 
diff --git a/Engine/lib/bullet/bullet.pc.in b/Engine/lib/bullet/bullet.pc.in
index 3b86d0aec..ffcd4f367 100644
--- a/Engine/lib/bullet/bullet.pc.in
+++ b/Engine/lib/bullet/bullet.pc.in
@@ -7,5 +7,5 @@ Name: bullet
 Description: Bullet Continuous Collision Detection and Physics Library
 Requires:
 Version: @PACKAGE_VERSION@
-Libs: -L${libdir} -lbulletdynamics -lbulletcollision -lbulletmath
+Libs: -L${libdir} -lBulletSoftBody -lBulletDynamics -lBulletCollision -lLinearMath
 Cflags: -I${includedir}/bullet
diff --git a/Engine/lib/bullet/config.h.in b/Engine/lib/bullet/config.h.in
index 2f10be9b4..11b564d03 100644
--- a/Engine/lib/bullet/config.h.in
+++ b/Engine/lib/bullet/config.h.in
@@ -1,5 +1,8 @@
 /* config.h.in.  Generated from configure.ac by autoheader.  */
 
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
 /* Architecture is PowerPC */
 #undef ARCH_PPC
 
@@ -9,27 +12,27 @@
 /* Architecture is x86-64 */
 #undef ARCH_X86_64
 
-/* Define when compiling for MacOS/X */
-#undef CS_PLATFORM_MACOSX
-
-/* Define when compiling for Unix and Unix-like (i.e. MacOS/X) */
-#undef CS_PLATFORM_UNIX
-
-/* Define when compiling for Win32 */
-#undef CS_PLATFORM_WIN32
+/* Use the Apple OpenGL framework. */
+#undef HAVE_APPLE_OPENGL_FRAMEWORK
 
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #undef HAVE_DLFCN_H
 
+/* Define to 1 if you have the <GL/glext.h> header file. */
+#undef HAVE_GL_GLEXT_H
+
+/* Define to 1 if you have the <GL/glut.h> header file. */
+#undef HAVE_GL_GLUT_H
+
+/* Define to 1 if you have the <GL/glu.h> header file. */
+#undef HAVE_GL_GLU_H
+
+/* Define to 1 if you have the <GL/gl.h> header file. */
+#undef HAVE_GL_GL_H
+
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 
-/* Define to 1 if you have the `mx' library (-lmx). */
-#undef HAVE_LIBMX
-
-/* Define to 1 if you have the `nsl' library (-lnsl). */
-#undef HAVE_LIBNSL
-
 /* Define to 1 if you have the <memory.h> header file. */
 #undef HAVE_MEMORY_H
 
@@ -51,19 +54,16 @@
 /* Define to 1 if you have the <sys/types.h> header file. */
 #undef HAVE_SYS_TYPES_H
 
-/* Whether the int32 type is available */
-#undef HAVE_TYPE_INT32
-
 /* Define to 1 if you have the <unistd.h> header file. */
 #undef HAVE_UNISTD_H
 
-/* Define to 1 if you have the <windows.h> header file. */
-#undef HAVE_WINDOWS_H
-
 /* Define to the sub-directory in which libtool stores uninstalled libraries.
    */
 #undef LT_OBJDIR
 
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+#undef NO_MINUS_C_MINUS_O
+
 /* Name of package */
 #undef PACKAGE
 
@@ -79,6 +79,9 @@
 /* Define to the one symbol short name of this package. */
 #undef PACKAGE_TARNAME
 
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
 /* Define to the version of this package. */
 #undef PACKAGE_VERSION
 
@@ -97,12 +100,14 @@
 /* Version number of package */
 #undef VERSION
 
-/* Define to 1 if your processor stores words with the most significant byte
-   first (like Motorola and SPARC, unlike Intel and VAX). */
-#undef WORDS_BIGENDIAN
-
-/* Define to 1 if the X Window System is missing or not being used. */
-#undef X_DISPLAY_MISSING
-
-/* Avoid problem caused by missing <Carbon/CarbonSound.h> */
-#undef __CARBONSOUND__
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
diff --git a/Engine/lib/bullet/configure.ac b/Engine/lib/bullet/configure.ac
index 4b3cb5b59..1d793612a 100644
--- a/Engine/lib/bullet/configure.ac
+++ b/Engine/lib/bullet/configure.ac
@@ -9,16 +9,12 @@ AC_PREREQ([2.54])
 #----------------------------------------------------------------------------
 AC_INIT(
     [bullet],
-    [2.75],
+    [2.81],
     [bullet@erwincoumans.com])
 AC_CANONICAL_HOST
-CS_PACKAGEINFO(
-    [Bullet Continuous Collision Detection and Physics Library],
-    [Copyright (c) 2005-2008  Erwin Coumans],
-    [http://www.bulletphysics.com])
 AC_CONFIG_SRCDIR([configure.ac])
 AM_INIT_AUTOMAKE
-AC_PROG_CC
+AM_PROG_CC_C_O
 AC_PROG_CXX
 AC_PROG_LIBTOOL
 
@@ -34,6 +30,7 @@ case "$host" in
                 PLATFORM_STRING="Linux"
                 ;;
         *-*-darwin*)
+		AC_MSG_WARN([Hello])
                 AC_DEFINE(PLATFORM_APPLE, 1, [Platform is Apple])
                 opengl_LIBS="-framework AGL -framework OpenGL -framework GLUT"
                 PLATFORM_STRING="Apple"
@@ -56,6 +53,7 @@ case "$host" in
                 ARCH_STRING="X86-64"
                 ;;
         ppc-* | powerpc-*)
+		AC_MSG_WARN([HI THERE!])
                 AC_DEFINE(ARCH_PPC, 1, [Architecture is PowerPC])
                 ARCH_SPECIFIC_CFLAGS=""
                 ARCH_STRING="PowerPC"
@@ -71,63 +69,6 @@ AC_C_BIGENDIAN
 # Setup for the configuration header.
 #----------------------------------------------------------------------------
 AC_CONFIG_HEADERS([config.h])
-
-#----------------------------------------------------------------------------
-# Check for tools.
-#----------------------------------------------------------------------------
-CS_PROG_CC
-AS_IF([test -z "$CC"],
-    [AC_MSG_ERROR([Could not find a usable C compiler.])])
-CS_PROG_CXX
-AS_IF([test -z "$CXX"],
-    [AC_MSG_ERROR([Could not find a usable C++ compiler.])])
-CS_PROG_LINK
-
-CS_CHECK_COMMON_TOOLS_LINK
-CS_CHECK_COMMON_TOOLS_BASIC
-CS_CHECK_COMMON_TOOLS_DOC_DOXYGEN
-
-CS_CHECK_PROGS([PERL], [perl5 perl])
-CS_EMIT_BUILD_PROPERTY([PERL], [$PERL])
-
-CS_CHECK_TEMPLATE_TOOLKIT2([emit])
-
-#----------------------------------------------------------------------------
-# Check if C++ exceptions can be disabled.
-#----------------------------------------------------------------------------
-CS_EMIT_BUILD_FLAGS([how to disable C++ exceptions],
-    [cs_cv_prog_cxx_disable_exceptions], [CS_CREATE_TUPLE([-fno-exceptions])],
-    [C++], [COMPILER.C++FLAGS.EXCEPTIONS.DISABLE], [],
-    [CS_EMIT_BUILD_PROPERTY([COMPILER.C++FLAGS],
-	[$cs_cv_prog_cxx_disable_exceptions], [+])])
-
-
-#----------------------------------------------------------------------------
-# Determine system type
-#----------------------------------------------------------------------------
-CS_CHECK_HOST
-
-#----------------------------------------------------------------------------
-# Check for syntax problems / header files
-#----------------------------------------------------------------------------
-# Nothing yet.
-
-#----------------------------------------------------------------------------
-# Check for GLUT.
-#----------------------------------------------------------------------------
-AS_IF([test $cs_host_family = windows],
-    [# Tack the GLUT that comes with bullet onto compiler & linker flags.
-    _AC_SRCDIRS(["."])
-    glut_cflags="-I$ac_top_srcdir/Glut"
-    glut_lflags="-L$ac_top_srcdir/Glut"
-    CFLAGS="$CFLAGS $glut_cflags"
-    LDFLAGS="$LDFLAGS $glut_lflags"
-    CS_EMIT_BUILD_PROPERTY([COMPILER.CFLAGS], [$glut_cflags], [+])
-    CS_EMIT_BUILD_PROPERTY([COMPILER.LFLAGS], [$glut_lflags], [+])
-    ])
-CS_CHECK_GLUT
-
-
 #----------------------------------------------------------------------------
 # Package configuration switches.
 #----------------------------------------------------------------------------
@@ -142,10 +83,63 @@ AM_CONDITIONAL([CONDITIONAL_BUILD_MULTITHREADED], [test "$build_multithreaded" =
 
 AC_ARG_ENABLE([demos],
     [AS_HELP_STRING([--disable-demos],
-	    [disable Bullet demos])],
+            [disable Bullet demos])],
     [],
     [enable_demos=yes])
 AM_CONDITIONAL([CONDITIONAL_BUILD_DEMOS], [false])
+
+dnl Check for OpenGL and GLUT
+
+
+case "$host" in
+        *-*-darwin*)
+                  AC_DEFINE([HAVE_APPLE_OPENGL_FRAMEWORK], [1],
+                            [Use the Apple OpenGL framework.])
+                  GL_LIBS="-framework GLUT -framework OpenGL -framework Carbon -framework AGL"
+                  have_glut=yes
+                  have_glu=yes
+                  have_gl=yes
+                ;;
+        *)
+                     have_gl_headers=yes
+                      AC_CHECK_HEADERS(GL/gl.h GL/glu.h GL/glext.h GL/glut.h, ,
+                            [have_gl_headers=no],
+                           [[#ifdef WIN32
+                             #include <windows.h>
+                             #endif
+                             #if HAVE_GL_GL_H
+                             #include <GL/gl.h>
+                             #endif
+                                 #if HAVE_GL_GLU_H
+                             #include <GL/glu.h>
+                             #endif
+                           ]])
+                      have_gl=no
+                      have_glu=no
+                      have_glut=no
+                      TEMP_LDFLAGS="$LDFLAGS"
+                      AC_CHECK_LIB(GL, main, [GL_LIBS="-lGL"; have_gl=yes])
+                      AC_CHECK_LIB(GLU, main, [GL_LIBS="-lGLU $GL_LIBS"; have_glu=yes], , -lGL)
+                      AC_CHECK_LIB(GLUT, main, [GL_LIBS="-lGLUT -LGLU  $GL_LIBS"; have_glut=yes], ,-lGLUT)
+                      AC_CHECK_LIB(opengl32, main, [GL_LIBS="-lopengl32"; have_gl=yes])
+                      AC_CHECK_LIB(glu32, main, [GL_LIBS="-lglu32 $GL_LIBS"; have_glu=yes], , -lopengl32)
+                      LDFLAGS="$TEMP_LDFLAGS"
+                      if test $have_gl = no -o $have_glu = no -o $have_gl_headers = no; then
+                        if test x$enable_demos = xyes; then
+                          AC_MSG_WARN([Demos and Extras will not be built because OpenGL and GLUT doesn't seem to work. See `config.log' for details.])
+                        fi
+                        enable_demos=no
+                      else
+                      AC_MSG_NOTICE([Found OpenGL])
+                      fi
+                ;;
+esac
+
+
+
+AC_SUBST(GL_LIBS)
+
+
 if test "x$enable_demos" != xno; then
     AC_MSG_NOTICE([Building Bullet demos])
     AM_CONDITIONAL([CONDITIONAL_BUILD_DEMOS],[true])
@@ -162,13 +156,6 @@ AC_MSG_CHECKING([build mode])
 AS_IF([test $enable_debug = yes], [build_mode=debug], [build_mode=optimize])
 AC_MSG_RESULT([$build_mode])
 
-CS_EMIT_BUILD_PROPERTY([MODE], [$build_mode])
-
-#-----------------------------------------------------------------------------
-# Emit install paths and package information.
-#-----------------------------------------------------------------------------
-CS_OUTPUT_INSTALLDIRS
-CS_EMIT_PACKAGEINFO
 
 
 CFLAGS="$ARCH_SPECIFIC_CFLAGS $CFLAGS"
@@ -176,17 +163,10 @@ CXXFLAGS="$ARCH_SPECIFIC_CFLAGS $CXXFLAGS $CFLAGS"
 #----------------------------------------------------------------------------
 # Emit generated files.
 #----------------------------------------------------------------------------
-CS_JAMCONFIG_OUTPUT([Jamconfig])
-AC_CONFIG_FILES([bullet.pc Jamfile Makefile Demos/Makefile Demos/SoftDemo/Makefile Demos/AllBulletDemos/Makefile Demos/MultiThreadedDemo/Makefile Demos/ColladaDemo/Makefile Demos/OpenGL/Makefile Demos/BasicDemo/Makefile Demos/CcdPhysicsDemo/Makefile Demos/VehicleDemo/Makefile Demos/TerrainDemo/Makefile src/Makefile Extras/Makefile])
+AC_CONFIG_FILES([bullet.pc Makefile Demos/Makefile Demos/SoftDemo/Makefile Demos/AllBulletDemos/Makefile Demos/MultiThreadedDemo/Makefile  Demos/OpenGL/Makefile Demos/BasicDemo/Makefile Demos/CcdPhysicsDemo/Makefile Demos/VehicleDemo/Makefile Demos/TerrainDemo/Makefile src/Makefile Extras/Makefile])
 AC_OUTPUT
 
 AC_MSG_NOTICE([
-You can type 'make' or 'jam' to build Bullet.
-Alternatively, you can use cmake or use the wksbullet.sln visual studio x solutions in the msvc/x folder.
-
-CMake home:http://cmake.org
-Jam home: http://www.perforce.com/jam/jam.html
-Jam source: ftp://ftp.perforce.com/jam/
 
 Please type 'make' to build Bullet
 ])
diff --git a/Engine/lib/bullet/src/Bullet-C-Api.h b/Engine/lib/bullet/src/Bullet-C-Api.h
index f309aba28..f27a17d51 100644
--- a/Engine/lib/bullet/src/Bullet-C-Api.h
+++ b/Engine/lib/bullet/src/Bullet-C-Api.h
@@ -65,7 +65,7 @@ extern "C" {
 	Create and Delete a Physics SDK	
 */
 
-	extern	plPhysicsSdkHandle	plNewBulletSdk(); //this could be also another sdk, like ODE, PhysX etc.
+	extern	plPhysicsSdkHandle	plNewBulletSdk(void); //this could be also another sdk, like ODE, PhysX etc.
 	extern	void		plDeletePhysicsSdk(plPhysicsSdkHandle	physicsSdk);
 
 /** Collision World, not strictly necessary, you can also just create a Dynamics World with Rigid Bodies which internally manages the Collision World with Collision Objects */
@@ -116,16 +116,16 @@ extern "C" {
 	extern  plCollisionShapeHandle plNewCapsuleShape(plReal radius, plReal height);	
 	extern  plCollisionShapeHandle plNewConeShape(plReal radius, plReal height);
 	extern  plCollisionShapeHandle plNewCylinderShape(plReal radius, plReal height);
-	extern	plCollisionShapeHandle plNewCompoundShape();
+	extern	plCollisionShapeHandle plNewCompoundShape(void);
 	extern	void	plAddChildShape(plCollisionShapeHandle compoundShape,plCollisionShapeHandle childShape, plVector3 childPos,plQuaternion childOrn);
 
 	extern  void plDeleteShape(plCollisionShapeHandle shape);
 
 	/* Convex Meshes */
-	extern  plCollisionShapeHandle plNewConvexHullShape();
+	extern  plCollisionShapeHandle plNewConvexHullShape(void);
 	extern  void		plAddVertex(plCollisionShapeHandle convexHull, plReal x,plReal y,plReal z);
 /* Concave static triangle meshes */
-	extern  plMeshInterfaceHandle		   plNewMeshInterface();
+	extern  plMeshInterfaceHandle		   plNewMeshInterface(void);
 	extern  void		plAddTriangle(plMeshInterfaceHandle meshHandle, plVector3 v0,plVector3 v1,plVector3 v2);
 	extern  plCollisionShapeHandle plNewStaticTriangleMeshShape(plMeshInterfaceHandle);
 
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h
index cad21b4ca..cd6e1a892 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btAxisSweep3.h
@@ -16,8 +16,8 @@
 //
 // 3. This notice may not be removed or altered from any source distribution.
 
-#ifndef AXIS_SWEEP_3_H
-#define AXIS_SWEEP_3_H
+#ifndef BT_AXIS_SWEEP_3_H
+#define BT_AXIS_SWEEP_3_H
 
 #include "LinearMath/btVector3.h"
 #include "btOverlappingPairCache.h"
@@ -150,6 +150,8 @@ public:
 	virtual void  getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
 	
 	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0));
+	virtual void	aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback);
+
 	
 	void quantize(BP_FP_INT_TYPE* out, const btVector3& point, int isMax) const;
 	///unQuantize should be conservative: aabbMin/aabbMax should be larger then 'getAabb' result
@@ -285,6 +287,31 @@ void	btAxisSweep3Internal<BP_FP_INT_TYPE>::rayTest(const btVector3& rayFrom,cons
 	}
 }
 
+template <typename BP_FP_INT_TYPE>
+void	btAxisSweep3Internal<BP_FP_INT_TYPE>::aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback)
+{
+	if (m_raycastAccelerator)
+	{
+		m_raycastAccelerator->aabbTest(aabbMin,aabbMax,callback);
+	} else
+	{
+		//choose axis?
+		BP_FP_INT_TYPE axis = 0;
+		//for each proxy
+		for (BP_FP_INT_TYPE i=1;i<m_numHandles*2+1;i++)
+		{
+			if (m_pEdges[axis][i].IsMax())
+			{
+				Handle* handle = getHandle(m_pEdges[axis][i].m_handle);
+				if (TestAabbAgainstAabb2(aabbMin,aabbMax,handle->m_aabbMin,handle->m_aabbMax))
+				{
+					callback.process(handle);
+				}
+			}
+		}
+	}
+}
+
 
 
 template <typename BP_FP_INT_TYPE>
@@ -588,7 +615,7 @@ void btAxisSweep3Internal<BP_FP_INT_TYPE>::removeHandle(BP_FP_INT_TYPE handle,bt
 }
 
 template <typename BP_FP_INT_TYPE>
-void btAxisSweep3Internal<BP_FP_INT_TYPE>::resetPool(btDispatcher* dispatcher)
+void btAxisSweep3Internal<BP_FP_INT_TYPE>::resetPool(btDispatcher* /*dispatcher*/)
 {
 	if (m_numHandles == 0)
 	{
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h
index b7bbaf512..f1bf00594 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseInterface.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef		BROADPHASE_INTERFACE_H
-#define 	BROADPHASE_INTERFACE_H
+#ifndef		BT_BROADPHASE_INTERFACE_H
+#define 	BT_BROADPHASE_INTERFACE_H
 
 
 
@@ -26,7 +26,14 @@ class btOverlappingPairCache;
 
 
 
-struct	btBroadphaseRayCallback
+struct	btBroadphaseAabbCallback
+{
+	virtual ~btBroadphaseAabbCallback() {}
+	virtual bool	process(const btBroadphaseProxy* proxy) = 0;
+};
+
+
+struct	btBroadphaseRayCallback : public btBroadphaseAabbCallback
 {
 	///added some cached data to accelerate ray-AABB tests
 	btVector3		m_rayDirectionInverse;
@@ -34,7 +41,6 @@ struct	btBroadphaseRayCallback
 	btScalar		m_lambda_max;
 
 	virtual ~btBroadphaseRayCallback() {}
-	virtual bool	process(const btBroadphaseProxy* proxy) = 0;
 };
 
 #include "LinearMath/btVector3.h"
@@ -54,6 +60,8 @@ public:
 
 	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0)) = 0;
 
+	virtual void	aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback) = 0;
+
 	///calculateOverlappingPairs is optional: incremental algorithms (sweep and prune) might do it during the set aabb
 	virtual void	calculateOverlappingPairs(btDispatcher* dispatcher)=0;
 
@@ -65,10 +73,10 @@ public:
 	virtual void getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const =0;
 
 	///reset broadphase internal structures, to ensure determinism/reproducability
-	virtual void resetPool(btDispatcher* dispatcher) {};
+	virtual void resetPool(btDispatcher* dispatcher) { (void) dispatcher; };
 
 	virtual void	printStats() = 0;
 
 };
 
-#endif //BROADPHASE_INTERFACE_H
+#endif //BT_BROADPHASE_INTERFACE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h
index 5e7584dbc..bb58b8289 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btBroadphaseProxy.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef BROADPHASE_PROXY_H
-#define BROADPHASE_PROXY_H
+#ifndef BT_BROADPHASE_PROXY_H
+#define BT_BROADPHASE_PROXY_H
 
 #include "LinearMath/btScalar.h" //for SIMD_FORCE_INLINE
 #include "LinearMath/btVector3.h"
@@ -141,6 +141,11 @@ BT_DECLARE_ALIGNED_ALLOCATOR();
 		return (proxyType < CONCAVE_SHAPES_START_HERE);
 	}
 
+	static SIMD_FORCE_INLINE bool	isNonMoving(int proxyType)
+	{
+		return (isConcave(proxyType) && !(proxyType==GIMPACT_SHAPE_PROXYTYPE));
+	}
+
 	static SIMD_FORCE_INLINE bool	isConcave(int proxyType)
 	{
 		return ((proxyType > CONCAVE_SHAPES_START_HERE) &&
@@ -150,6 +155,12 @@ BT_DECLARE_ALIGNED_ALLOCATOR();
 	{
 		return (proxyType == COMPOUND_SHAPE_PROXYTYPE);
 	}
+
+	static SIMD_FORCE_INLINE bool	isSoftBody(int proxyType)
+	{
+		return (proxyType == SOFTBODY_SHAPE_PROXYTYPE);
+	}
+
 	static SIMD_FORCE_INLINE bool isInfinite(int proxyType)
 	{
 		return (proxyType == STATIC_PLANE_PROXYTYPE);
@@ -235,7 +246,7 @@ class btBroadphasePairSortPredicate
 {
 	public:
 
-		bool operator() ( const btBroadphasePair& a, const btBroadphasePair& b )
+		bool operator() ( const btBroadphasePair& a, const btBroadphasePair& b ) const
 		{
 			const int uidA0 = a.m_pProxy0 ? a.m_pProxy0->m_uniqueId : -1;
 			const int uidB0 = b.m_pProxy0 ? b.m_pProxy0->m_uniqueId : -1;
@@ -255,5 +266,5 @@ SIMD_FORCE_INLINE bool operator==(const btBroadphasePair& a, const btBroadphaseP
 }
 
 
-#endif //BROADPHASE_PROXY_H
+#endif //BT_BROADPHASE_PROXY_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h
index 1618ad9fd..405656236 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COLLISION_ALGORITHM_H
-#define COLLISION_ALGORITHM_H
+#ifndef BT_COLLISION_ALGORITHM_H
+#define BT_COLLISION_ALGORITHM_H
 
 #include "LinearMath/btScalar.h"
 #include "LinearMath/btAlignedObjectArray.h"
@@ -23,6 +23,7 @@ struct btBroadphaseProxy;
 class btDispatcher;
 class btManifoldResult;
 class btCollisionObject;
+struct btCollisionObjectWrapper;
 struct btDispatcherInfo;
 class	btPersistentManifold;
 
@@ -44,7 +45,7 @@ struct btCollisionAlgorithmConstructionInfo
 	btDispatcher*	m_dispatcher1;
 	btPersistentManifold*	m_manifold;
 
-	int	getDispatcherId();
+//	int	getDispatcherId();
 
 };
 
@@ -59,7 +60,7 @@ protected:
 	btDispatcher*	m_dispatcher;
 
 protected:
-	int	getDispatcherId();
+//	int	getDispatcherId();
 	
 public:
 
@@ -69,7 +70,7 @@ public:
 
 	virtual ~btCollisionAlgorithm() {};
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) = 0;
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) = 0;
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut) = 0;
 
@@ -77,4 +78,4 @@ public:
 };
 
 
-#endif //COLLISION_ALGORITHM_H
+#endif //BT_COLLISION_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.cpp b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.cpp
index ff32ec1d4..95443af50 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.cpp
@@ -61,7 +61,7 @@ static void						getmaxdepth(const btDbvtNode* node,int depth,int& maxdepth)
 	if(node->isinternal())
 	{
 		getmaxdepth(node->childs[0],depth+1,maxdepth);
-		getmaxdepth(node->childs[0],depth+1,maxdepth);
+		getmaxdepth(node->childs[1],depth+1,maxdepth);
 	} else maxdepth=btMax(maxdepth,depth);
 }
 
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.h
index d733fedf2..b64936844 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvt.h
@@ -32,7 +32,7 @@ subject to the following restrictions:
 #define DBVT_IMPL_SSE			1	// SSE
 
 // Template implementation of ICollide
-#ifdef WIN32
+#ifdef _WIN32
 #if (defined (_MSC_VER) && _MSC_VER >= 1400)
 #define	DBVT_USE_TEMPLATE		1
 #else
@@ -57,7 +57,7 @@ subject to the following restrictions:
 // Specific methods implementation
 
 //SSE gives errors on a MSVC 7.1
-#if defined (BT_USE_SSE) && defined (WIN32)
+#if defined (BT_USE_SSE) //&& defined (_WIN32)
 #define DBVT_SELECT_IMPL		DBVT_IMPL_SSE
 #define DBVT_MERGE_IMPL			DBVT_IMPL_SSE
 #define DBVT_INT0_IMPL			DBVT_IMPL_SSE
@@ -92,7 +92,7 @@ subject to the following restrictions:
 #endif
 
 #if DBVT_USE_MEMMOVE
-#ifndef __CELLOS_LV2__
+#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
 #include <memory.h>
 #endif
 #include <string.h>
@@ -160,6 +160,10 @@ struct	btDbvtAabbMm
 		btDbvtAabbMm& r);
 	DBVT_INLINE friend bool			NotEqual(	const btDbvtAabbMm& a,
 		const btDbvtAabbMm& b);
+    
+    DBVT_INLINE btVector3&	tMins()	{ return(mi); }
+	DBVT_INLINE btVector3&	tMaxs()	{ return(mx); }
+    
 private:
 	DBVT_INLINE void				AddSpan(const btVector3& d,btScalar& smi,btScalar& smx) const;
 private:
@@ -259,6 +263,7 @@ struct	btDbvt
 
 	
 	btAlignedObjectArray<sStkNN>	m_stkStack;
+	mutable btAlignedObjectArray<const btDbvtNode*>	m_rayTestStack;
 
 
 	// Methods
@@ -319,7 +324,7 @@ struct	btDbvt
 	DBVT_PREFIX
 		void		collideTV(	const btDbvtNode* root,
 		const btDbvtVolume& volume,
-		DBVT_IPOLICY);
+		DBVT_IPOLICY) const;
 	///rayTest is a re-entrant ray test, and can be called in parallel as long as the btAlignedAlloc is thread-safe (uses locking etc)
 	///rayTest is slower than rayTestInternal, because it builds a local stack, using memory allocations, and it recomputes signs/rayDirectionInverses each time
 	DBVT_PREFIX
@@ -518,7 +523,11 @@ DBVT_INLINE bool		Intersect(	const btDbvtAabbMm& a,
 #if	DBVT_INT0_IMPL == DBVT_IMPL_SSE
 	const __m128	rt(_mm_or_ps(	_mm_cmplt_ps(_mm_load_ps(b.mx),_mm_load_ps(a.mi)),
 		_mm_cmplt_ps(_mm_load_ps(a.mx),_mm_load_ps(b.mi))));
+#if defined (_WIN32)
 	const __int32*	pu((const __int32*)&rt);
+#else
+    const int*	pu((const int*)&rt);
+#endif
 	return((pu[0]|pu[1]|pu[2])==0);
 #else
 	return(	(a.mi.x()<=b.mx.x())&&
@@ -567,7 +576,12 @@ DBVT_INLINE int			Select(	const btDbvtAabbMm& o,
 							   const btDbvtAabbMm& b)
 {
 #if	DBVT_SELECT_IMPL == DBVT_IMPL_SSE
+    
+#if defined (_WIN32)
 	static ATTRIBUTE_ALIGNED16(const unsigned __int32)	mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff};
+#else
+    static ATTRIBUTE_ALIGNED16(const unsigned int)	mask[]={0x7fffffff,0x7fffffff,0x7fffffff,0x00000000 /*0x7fffffff*/};
+#endif
 	///@todo: the intrinsic version is 11% slower
 #if DBVT_USE_INTRINSIC_SSE
 
@@ -907,7 +921,7 @@ inline void		btDbvt::collideTT(	const btDbvtNode* root0,
 DBVT_PREFIX
 inline void		btDbvt::collideTV(	const btDbvtNode* root,
 								  const btDbvtVolume& vol,
-								  DBVT_IPOLICY)
+								  DBVT_IPOLICY) const
 {
 	DBVT_CHECKTYPE
 		if(root)
@@ -947,6 +961,7 @@ inline void		btDbvt::rayTestInternal(	const btDbvtNode* root,
 								const btVector3& aabbMax,
 								DBVT_IPOLICY) const
 {
+        (void) rayTo;
 	DBVT_CHECKTYPE
 	if(root)
 	{
@@ -954,15 +969,15 @@ inline void		btDbvt::rayTestInternal(	const btDbvtNode* root,
 
 		int								depth=1;
 		int								treshold=DOUBLE_STACKSIZE-2;
-		btAlignedObjectArray<const btDbvtNode*>	stack;
+		btAlignedObjectArray<const btDbvtNode*>&	stack = m_rayTestStack;
 		stack.resize(DOUBLE_STACKSIZE);
 		stack[0]=root;
 		btVector3 bounds[2];
 		do	
 		{
 			const btDbvtNode*	node=stack[--depth];
-			bounds[0] = node->volume.Mins()+aabbMin;
-			bounds[1] = node->volume.Maxs()+aabbMax;
+			bounds[0] = node->volume.Mins()-aabbMax;
+			bounds[1] = node->volume.Maxs()-aabbMin;
 			btScalar tmin=1.f,lambda_min=0.f;
 			unsigned int result1=false;
 			result1 = btRayAabb2(rayFrom,rayDirectionInverse,signs,bounds,tmin,lambda_min,lambda_max);
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp
index 1a349a30b..75cfac643 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.cpp
@@ -251,6 +251,33 @@ void	btDbvtBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo,
 }
 
 
+struct	BroadphaseAabbTester : btDbvt::ICollide
+{
+	btBroadphaseAabbCallback& m_aabbCallback;
+	BroadphaseAabbTester(btBroadphaseAabbCallback& orgCallback)
+		:m_aabbCallback(orgCallback)
+	{
+	}
+	void					Process(const btDbvtNode* leaf)
+	{
+		btDbvtProxy*	proxy=(btDbvtProxy*)leaf->data;
+		m_aabbCallback.process(proxy);
+	}
+};	
+
+void	btDbvtBroadphase::aabbTest(const btVector3& aabbMin,const btVector3& aabbMax,btBroadphaseAabbCallback& aabbCallback)
+{
+	BroadphaseAabbTester callback(aabbCallback);
+
+	const ATTRIBUTE_ALIGNED16(btDbvtVolume)	bounds=btDbvtVolume::FromMM(aabbMin,aabbMax);
+		//process all children, that overlap with  the given AABB bounds
+	m_sets[0].collideTV(m_sets[0].m_root,bounds,callback);
+	m_sets[1].collideTV(m_sets[1].m_root,bounds,callback);
+
+}
+
+
+
 //
 void							btDbvtBroadphase::setAabb(		btBroadphaseProxy* absproxy,
 														  const btVector3& aabbMin,
@@ -318,6 +345,47 @@ void							btDbvtBroadphase::setAabb(		btBroadphaseProxy* absproxy,
 	}
 }
 
+
+//
+void							btDbvtBroadphase::setAabbForceUpdate(		btBroadphaseProxy* absproxy,
+														  const btVector3& aabbMin,
+														  const btVector3& aabbMax,
+														  btDispatcher* /*dispatcher*/)
+{
+	btDbvtProxy*						proxy=(btDbvtProxy*)absproxy;
+	ATTRIBUTE_ALIGNED16(btDbvtVolume)	aabb=btDbvtVolume::FromMM(aabbMin,aabbMax);
+	bool	docollide=false;
+	if(proxy->stage==STAGECOUNT)
+	{/* fixed -> dynamic set	*/ 
+		m_sets[1].remove(proxy->leaf);
+		proxy->leaf=m_sets[0].insert(aabb,proxy);
+		docollide=true;
+	}
+	else
+	{/* dynamic set				*/ 
+		++m_updates_call;
+		/* Teleporting			*/ 
+		m_sets[0].update(proxy->leaf,aabb);
+		++m_updates_done;
+		docollide=true;
+	}
+	listremove(proxy,m_stageRoots[proxy->stage]);
+	proxy->m_aabbMin = aabbMin;
+	proxy->m_aabbMax = aabbMax;
+	proxy->stage	=	m_stageCurrent;
+	listappend(proxy,m_stageRoots[m_stageCurrent]);
+	if(docollide)
+	{
+		m_needcleanup=true;
+		if(!m_deferedcollide)
+		{
+			btDbvtTreeCollider	collider(this);
+			m_sets[1].collideTTpersistentStack(m_sets[1].m_root,proxy->leaf,collider);
+			m_sets[0].collideTTpersistentStack(m_sets[0].m_root,proxy->leaf,collider);
+		}
+	}	
+}
+
 //
 void							btDbvtBroadphase::calculateOverlappingPairs(btDispatcher* dispatcher)
 {
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h
index 5615535c2..18b64ad0e 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDbvtBroadphase.h
@@ -102,20 +102,27 @@ struct	btDbvtBroadphase : btBroadphaseInterface
 	~btDbvtBroadphase();
 	void							collide(btDispatcher* dispatcher);
 	void							optimize();
-	/* btBroadphaseInterface Implementation	*/ 
+	
+	/* btBroadphaseInterface Implementation	*/
 	btBroadphaseProxy*				createProxy(const btVector3& aabbMin,const btVector3& aabbMax,int shapeType,void* userPtr,short int collisionFilterGroup,short int collisionFilterMask,btDispatcher* dispatcher,void* multiSapProxy);
-	void							destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
-	void							setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* dispatcher);
-	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0));
+	virtual void					destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
+	virtual void					setAabb(btBroadphaseProxy* proxy,const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* dispatcher);
+	virtual void					rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0), const btVector3& aabbMax = btVector3(0,0,0));
+	virtual void					aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback);
 
-	virtual void	getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
-	void							calculateOverlappingPairs(btDispatcher* dispatcher);
-	btOverlappingPairCache*			getOverlappingPairCache();
-	const btOverlappingPairCache*	getOverlappingPairCache() const;
-	void							getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const;
-	void							printStats();
-	static void						benchmark(btBroadphaseInterface*);
+	virtual void					getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
+	virtual	void					calculateOverlappingPairs(btDispatcher* dispatcher);
+	virtual	btOverlappingPairCache*	getOverlappingPairCache();
+	virtual	const btOverlappingPairCache*	getOverlappingPairCache() const;
+	virtual	void					getBroadphaseAabb(btVector3& aabbMin,btVector3& aabbMax) const;
+	virtual	void					printStats();
 
+
+	///reset broadphase internal structures, to ensure determinism/reproducability
+	virtual void resetPool(btDispatcher* dispatcher);
+
+	void	performDeferredRemoval(btDispatcher* dispatcher);
+	
 	void	setVelocityPrediction(btScalar prediction)
 	{
 		m_prediction = prediction;
@@ -124,11 +131,15 @@ struct	btDbvtBroadphase : btBroadphaseInterface
 	{
 		return m_prediction;
 	}
-	
-	void	performDeferredRemoval(btDispatcher* dispatcher);
 
-	///reset broadphase internal structures, to ensure determinism/reproducability
-	virtual void resetPool(btDispatcher* dispatcher);
+	///this setAabbForceUpdate is similar to setAabb but always forces the aabb update. 
+	///it is not part of the btBroadphaseInterface but specific to btDbvtBroadphase.
+	///it bypasses certain optimizations that prevent aabb updates (when the aabb shrinks), see
+	///http://code.google.com/p/bullet/issues/detail?id=223
+	void							setAabbForceUpdate(		btBroadphaseProxy* absproxy,const btVector3& aabbMin,const btVector3& aabbMax,btDispatcher* /*dispatcher*/);
+
+	static void						benchmark(btBroadphaseInterface*);
+
 
 };
 
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDispatcher.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDispatcher.h
index 699c66b82..1ebb37797 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDispatcher.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btDispatcher.h
@@ -13,9 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef _DISPATCHER_H
-#define _DISPATCHER_H
-
+#ifndef BT_DISPATCHER_H
+#define BT_DISPATCHER_H
 #include "LinearMath/btScalar.h"
 
 class btCollisionAlgorithm;
@@ -23,10 +22,11 @@ struct btBroadphaseProxy;
 class btRigidBody;
 class	btCollisionObject;
 class btOverlappingPairCache;
-
+struct btCollisionObjectWrapper;
 
 class btPersistentManifold;
 class btStackAlloc;
+class btPoolAllocator;
 
 struct btDispatcherInfo
 {
@@ -40,7 +40,7 @@ struct btDispatcherInfo
 		m_stepCount(0),
 		m_dispatchFunc(DISPATCH_DISCRETE),
 		m_timeOfImpact(btScalar(1.)),
-		m_useContinuous(false),
+		m_useContinuous(true),
 		m_debugDraw(0),
 		m_enableSatConvex(false),
 		m_enableSPU(true),
@@ -76,17 +76,17 @@ class btDispatcher
 public:
 	virtual ~btDispatcher() ;
 
-	virtual btCollisionAlgorithm* findAlgorithm(btCollisionObject* body0,btCollisionObject* body1,btPersistentManifold* sharedManifold=0) = 0;
+	virtual btCollisionAlgorithm* findAlgorithm(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,btPersistentManifold* sharedManifold=0) = 0;
 
-	virtual btPersistentManifold*	getNewManifold(void* body0,void* body1)=0;
+	virtual btPersistentManifold*	getNewManifold(const btCollisionObject* b0,const btCollisionObject* b1)=0;
 
 	virtual void releaseManifold(btPersistentManifold* manifold)=0;
 
 	virtual void clearManifold(btPersistentManifold* manifold)=0;
 
-	virtual bool	needsCollision(btCollisionObject* body0,btCollisionObject* body1) = 0;
+	virtual bool	needsCollision(const btCollisionObject* body0,const btCollisionObject* body1) = 0;
 
-	virtual bool	needsResponse(btCollisionObject* body0,btCollisionObject* body1)=0;
+	virtual bool	needsResponse(const btCollisionObject* body0,const btCollisionObject* body1)=0;
 
 	virtual void	dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher)  =0;
 
@@ -96,6 +96,10 @@ public:
 
 	virtual	btPersistentManifold**	getInternalManifoldPointer() = 0;
 
+	virtual	btPoolAllocator*	getInternalManifoldPool() = 0;
+
+	virtual	const btPoolAllocator*	getInternalManifoldPool() const = 0;
+
 	virtual	void* allocateCollisionAlgorithm(int size)  = 0;
 
 	virtual	void freeCollisionAlgorithm(void* ptr) = 0;
@@ -103,4 +107,4 @@ public:
 };
 
 
-#endif //_DISPATCHER_H
+#endif //BT_DISPATCHER_H
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp
index 6712f528e..81369fe9b 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btMultiSapBroadphase.cpp
@@ -341,7 +341,7 @@ class btMultiSapBroadphasePairSortPredicate
 {
 	public:
 
-		bool operator() ( const btBroadphasePair& a1, const btBroadphasePair& b1 )
+		bool operator() ( const btBroadphasePair& a1, const btBroadphasePair& b1 ) const
 		{
 				btMultiSapBroadphase::btMultiSapProxy* aProxy0 = a1.m_pProxy0 ? (btMultiSapBroadphase::btMultiSapProxy*)a1.m_pProxy0->m_multiSapParentProxy : 0;
 				btMultiSapBroadphase::btMultiSapProxy* aProxy1 = a1.m_pProxy1 ? (btMultiSapBroadphase::btMultiSapProxy*)a1.m_pProxy1->m_multiSapParentProxy : 0;
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp
index b209bcb9a..041bbe05a 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.cpp
@@ -240,7 +240,7 @@ btBroadphasePair* btHashedOverlappingPairCache::internalAddPair(btBroadphaseProx
 		}*/
 	int count = m_overlappingPairArray.size();
 	int oldCapacity = m_overlappingPairArray.capacity();
-	void* mem = &m_overlappingPairArray.expand();
+	void* mem = &m_overlappingPairArray.expandNonInitializing();
 
 	//this is where we add an actual pair, so also call the 'ghost'
 	if (m_ghostPairCallback)
@@ -467,7 +467,7 @@ btBroadphasePair*	btSortedOverlappingPairCache::addOverlappingPair(btBroadphaseP
 	if (!needsBroadphaseCollision(proxy0,proxy1))
 		return 0;
 	
-	void* mem = &m_overlappingPairArray.expand();
+	void* mem = &m_overlappingPairArray.expandNonInitializing();
 	btBroadphasePair* pair = new (mem) btBroadphasePair(*proxy0,*proxy1);
 	
 	gOverlappingPairs++;
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
index eda45c47b..7a3806c1d 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef OVERLAPPING_PAIR_CACHE_H
-#define OVERLAPPING_PAIR_CACHE_H
+#ifndef BT_OVERLAPPING_PAIR_CACHE_H
+#define BT_OVERLAPPING_PAIR_CACHE_H
 
 
 #include "btBroadphaseInterface.h"
@@ -457,12 +457,13 @@ public:
 	
 	virtual void	sortOverlappingPairs(btDispatcher* dispatcher)
 	{
+        (void) dispatcher;
 	}
 
 
 };
 
 
-#endif //OVERLAPPING_PAIR_CACHE_H
+#endif //BT_OVERLAPPING_PAIR_CACHE_H
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
index 41ff80d15..c911435a9 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
@@ -17,6 +17,7 @@ subject to the following restrictions:
 
 #include "LinearMath/btAabbUtil2.h"
 #include "LinearMath/btIDebugDraw.h"
+#include "LinearMath/btSerializer.h"
 
 #define RAYAABB2
 
@@ -78,10 +79,10 @@ void btQuantizedBvh::buildInternal()
 #ifdef DEBUG_PATCH_COLORS
 btVector3 color[4]=
 {
-	btVector3(255,0,0),
-	btVector3(0,255,0),
-	btVector3(0,0,255),
-	btVector3(0,255,255)
+	btVector3(1,0,0),
+	btVector3(0,1,0),
+	btVector3(0,0,1),
+	btVector3(0,1,1)
 };
 #endif //DEBUG_PATCH_COLORS
 
@@ -493,8 +494,8 @@ void	btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
 		bounds[0] = rootNode->m_aabbMinOrg;
 		bounds[1] = rootNode->m_aabbMaxOrg;
 		/* Add box cast extents */
-		bounds[0] += aabbMin;
-		bounds[1] += aabbMax;
+		bounds[0] -= aabbMax;
+		bounds[1] -= aabbMin;
 
 		aabbOverlap = TestAabbAgainstAabb2(rayAabbMin,rayAabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg);
 		//perhaps profile if it is worth doing the aabbOverlap test first
@@ -617,8 +618,8 @@ void	btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
 			bounds[0] = unQuantize(rootNode->m_quantizedAabbMin);
 			bounds[1] = unQuantize(rootNode->m_quantizedAabbMax);
 			/* Add box cast extents */
-			bounds[0] += aabbMin;
-			bounds[1] += aabbMax;
+			bounds[0] -= aabbMax;
+			bounds[1] -= aabbMin;
 			btVector3 normal;
 #if 0
 			bool ra2 = btRayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max);
@@ -830,7 +831,7 @@ unsigned int btQuantizedBvh::getAlignmentSerializationPadding()
 	return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT;
 }
 
-unsigned btQuantizedBvh::calculateSerializeBufferSize()
+unsigned btQuantizedBvh::calculateSerializeBufferSize() const
 {
 	unsigned baseSize = sizeof(btQuantizedBvh) + getAlignmentSerializationPadding();
 	baseSize += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount;
@@ -841,7 +842,7 @@ unsigned btQuantizedBvh::calculateSerializeBufferSize()
 	return baseSize + m_curNodeIndex * sizeof(btOptimizedBvhNode);
 }
 
-bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian)
+bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const
 {
 	btAssert(m_subtreeHeaderCount == m_SubtreeHeaders.size());
 	m_subtreeHeaderCount = m_SubtreeHeaders.size();
@@ -1143,6 +1144,232 @@ m_bulletVersion(BT_BULLET_VERSION)
 
 }
 
+void btQuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData)
+{
+	m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax);
+	m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin);
+	m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization);
+
+	m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex;
+	m_useQuantization = quantizedBvhFloatData.m_useQuantization!=0;
+	
+	{
+		int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes;
+		m_contiguousNodes.resize(numElem);
+
+		if (numElem)
+		{
+			btOptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr;
+
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_contiguousNodes[i].m_aabbMaxOrg.deSerializeFloat(memPtr->m_aabbMaxOrg);
+				m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg);
+				m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex;
+				m_contiguousNodes[i].m_subPart = memPtr->m_subPart;
+				m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex;
+			}
+		}
+	}
+
+	{
+		int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes;
+		m_quantizedContiguousNodes.resize(numElem);
+		
+		if (numElem)
+		{
+			btQuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+			}
+		}
+	}
+
+	m_traversalMode = btTraversalMode(quantizedBvhFloatData.m_traversalMode);
+	
+	{
+		int numElem = quantizedBvhFloatData.m_numSubtreeHeaders;
+		m_SubtreeHeaders.resize(numElem);
+		if (numElem)
+		{
+			btBvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ;
+				m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+				m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex;
+				m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize;
+			}
+		}
+	}
+}
+
+void btQuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData)
+{
+	m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax);
+	m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin);
+	m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization);
+
+	m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex;
+	m_useQuantization = quantizedBvhDoubleData.m_useQuantization!=0;
+	
+	{
+		int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes;
+		m_contiguousNodes.resize(numElem);
+
+		if (numElem)
+		{
+			btOptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr;
+
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_contiguousNodes[i].m_aabbMaxOrg.deSerializeDouble(memPtr->m_aabbMaxOrg);
+				m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg);
+				m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex;
+				m_contiguousNodes[i].m_subPart = memPtr->m_subPart;
+				m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex;
+			}
+		}
+	}
+
+	{
+		int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes;
+		m_quantizedContiguousNodes.resize(numElem);
+		
+		if (numElem)
+		{
+			btQuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+			}
+		}
+	}
+
+	m_traversalMode = btTraversalMode(quantizedBvhDoubleData.m_traversalMode);
+	
+	{
+		int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders;
+		m_SubtreeHeaders.resize(numElem);
+		if (numElem)
+		{
+			btBvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ;
+				m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1];
+				m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2];
+				m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex;
+				m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize;
+			}
+		}
+	}
+
+}
+
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btQuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btQuantizedBvhData* quantizedData = (btQuantizedBvhData*)dataBuffer;
+	
+	m_bvhAabbMax.serialize(quantizedData->m_bvhAabbMax);
+	m_bvhAabbMin.serialize(quantizedData->m_bvhAabbMin);
+	m_bvhQuantization.serialize(quantizedData->m_bvhQuantization);
+
+	quantizedData->m_curNodeIndex = m_curNodeIndex;
+	quantizedData->m_useQuantization = m_useQuantization;
+	
+	quantizedData->m_numContiguousLeafNodes = m_contiguousNodes.size();
+	quantizedData->m_contiguousNodesPtr = (btOptimizedBvhNodeData*) (m_contiguousNodes.size() ? serializer->getUniquePointer((void*)&m_contiguousNodes[0]) : 0);
+	if (quantizedData->m_contiguousNodesPtr)
+	{
+		int sz = sizeof(btOptimizedBvhNodeData);
+		int numElem = m_contiguousNodes.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btOptimizedBvhNodeData* memPtr = (btOptimizedBvhNodeData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_contiguousNodes[i].m_aabbMaxOrg.serialize(memPtr->m_aabbMaxOrg);
+			m_contiguousNodes[i].m_aabbMinOrg.serialize(memPtr->m_aabbMinOrg);
+			memPtr->m_escapeIndex = m_contiguousNodes[i].m_escapeIndex;
+			memPtr->m_subPart = m_contiguousNodes[i].m_subPart;
+			memPtr->m_triangleIndex = m_contiguousNodes[i].m_triangleIndex;
+		}
+		serializer->finalizeChunk(chunk,"btOptimizedBvhNodeData",BT_ARRAY_CODE,(void*)&m_contiguousNodes[0]);
+	}
+
+	quantizedData->m_numQuantizedContiguousNodes = m_quantizedContiguousNodes.size();
+//	printf("quantizedData->m_numQuantizedContiguousNodes=%d\n",quantizedData->m_numQuantizedContiguousNodes);
+	quantizedData->m_quantizedContiguousNodesPtr =(btQuantizedBvhNodeData*) (m_quantizedContiguousNodes.size() ? serializer->getUniquePointer((void*)&m_quantizedContiguousNodes[0]) : 0);
+	if (quantizedData->m_quantizedContiguousNodesPtr)
+	{
+		int sz = sizeof(btQuantizedBvhNodeData);
+		int numElem = m_quantizedContiguousNodes.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btQuantizedBvhNodeData* memPtr = (btQuantizedBvhNodeData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex;
+			memPtr->m_quantizedAabbMax[0] = m_quantizedContiguousNodes[i].m_quantizedAabbMax[0];
+			memPtr->m_quantizedAabbMax[1] = m_quantizedContiguousNodes[i].m_quantizedAabbMax[1];
+			memPtr->m_quantizedAabbMax[2] = m_quantizedContiguousNodes[i].m_quantizedAabbMax[2];
+			memPtr->m_quantizedAabbMin[0] = m_quantizedContiguousNodes[i].m_quantizedAabbMin[0];
+			memPtr->m_quantizedAabbMin[1] = m_quantizedContiguousNodes[i].m_quantizedAabbMin[1];
+			memPtr->m_quantizedAabbMin[2] = m_quantizedContiguousNodes[i].m_quantizedAabbMin[2];
+		}
+		serializer->finalizeChunk(chunk,"btQuantizedBvhNodeData",BT_ARRAY_CODE,(void*)&m_quantizedContiguousNodes[0]);
+	}
+
+	quantizedData->m_traversalMode = int(m_traversalMode);
+	quantizedData->m_numSubtreeHeaders = m_SubtreeHeaders.size();
+
+	quantizedData->m_subTreeInfoPtr = (btBvhSubtreeInfoData*) (m_SubtreeHeaders.size() ? serializer->getUniquePointer((void*)&m_SubtreeHeaders[0]) : 0);
+	if (quantizedData->m_subTreeInfoPtr)
+	{
+		int sz = sizeof(btBvhSubtreeInfoData);
+		int numElem = m_SubtreeHeaders.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btBvhSubtreeInfoData* memPtr = (btBvhSubtreeInfoData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_quantizedAabbMax[0] = m_SubtreeHeaders[i].m_quantizedAabbMax[0];
+			memPtr->m_quantizedAabbMax[1] = m_SubtreeHeaders[i].m_quantizedAabbMax[1];
+			memPtr->m_quantizedAabbMax[2] = m_SubtreeHeaders[i].m_quantizedAabbMax[2];
+			memPtr->m_quantizedAabbMin[0] = m_SubtreeHeaders[i].m_quantizedAabbMin[0];
+			memPtr->m_quantizedAabbMin[1] = m_SubtreeHeaders[i].m_quantizedAabbMin[1];
+			memPtr->m_quantizedAabbMin[2] = m_SubtreeHeaders[i].m_quantizedAabbMin[2];
+
+			memPtr->m_rootNodeIndex = m_SubtreeHeaders[i].m_rootNodeIndex;
+			memPtr->m_subtreeSize = m_SubtreeHeaders[i].m_subtreeSize;
+		}
+		serializer->finalizeChunk(chunk,"btBvhSubtreeInfoData",BT_ARRAY_CODE,(void*)&m_SubtreeHeaders[0]);
+	}
+	return btQuantizedBvhDataName;
+}
+
+
 
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.h
index ced457b60..78382da79 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btQuantizedBvh.h
@@ -13,8 +13,10 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef QUANTIZED_BVH_H
-#define QUANTIZED_BVH_H
+#ifndef BT_QUANTIZED_BVH_H
+#define BT_QUANTIZED_BVH_H
+
+class btSerializer;
 
 //#define DEBUG_CHECK_DEQUANTIZATION 1
 #ifdef DEBUG_CHECK_DEQUANTIZATION
@@ -29,6 +31,17 @@ subject to the following restrictions:
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btAlignedAllocator.h"
 
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btQuantizedBvhData btQuantizedBvhDoubleData
+#define btOptimizedBvhNodeData btOptimizedBvhNodeDoubleData
+#define btQuantizedBvhDataName "btQuantizedBvhDoubleData"
+#else
+#define btQuantizedBvhData btQuantizedBvhFloatData
+#define btOptimizedBvhNodeData btOptimizedBvhNodeFloatData
+#define btQuantizedBvhDataName "btQuantizedBvhFloatData"
+#endif
+
+
 
 //http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp
 
@@ -65,8 +78,10 @@ ATTRIBUTE_ALIGNED16	(struct) btQuantizedBvhNode
 	int	getTriangleIndex() const
 	{
 		btAssert(isLeafNode());
+		unsigned int x=0;
+		unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
 		// Get only the lower bits where the triangle index is stored
-		return (m_escapeIndexOrTriangleIndex&~((~0)<<(31-MAX_NUM_PARTS_IN_BITS)));
+		return (m_escapeIndexOrTriangleIndex&~(y));
 	}
 	int	getPartId() const
 	{
@@ -94,9 +109,9 @@ ATTRIBUTE_ALIGNED16 (struct) btOptimizedBvhNode
 	//for child nodes
 	int	m_subPart;
 	int	m_triangleIndex;
-	int	m_padding[5];//bad, due to alignment
-
 
+//pad the size to 64 bytes
+	char	m_padding[20];
 };
 
 
@@ -190,7 +205,7 @@ protected:
 	BvhSubtreeInfoArray		m_SubtreeHeaders;
 
 	//This is only used for serialization so we don't have to add serialization directly to btAlignedObjectArray
-	int m_subtreeHeaderCount;
+	mutable int m_subtreeHeaderCount;
 
 	
 
@@ -443,17 +458,32 @@ public:
 		return m_SubtreeHeaders;
 	}
 
+////////////////////////////////////////////////////////////////////
 
 	/////Calculate space needed to store BVH for serialization
-	unsigned calculateSerializeBufferSize();
+	unsigned calculateSerializeBufferSize() const;
 
 	/// Data buffer MUST be 16 byte aligned
-	virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian);
+	virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const;
 
 	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
 	static btQuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
 
 	static unsigned int getAlignmentSerializationPadding();
+//////////////////////////////////////////////////////////////////////
+
+	
+	virtual	int	calculateSerializeBufferSizeNew() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	virtual	void deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData);
+
+	virtual	void deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData);
+
+
+////////////////////////////////////////////////////////////////////
 
 	SIMD_FORCE_INLINE bool isQuantized()
 	{
@@ -470,4 +500,82 @@ private:
 ;
 
 
-#endif //QUANTIZED_BVH_H
+struct	btBvhSubtreeInfoData
+{
+	int			m_rootNodeIndex;
+	int			m_subtreeSize;
+	unsigned short m_quantizedAabbMin[3];
+	unsigned short m_quantizedAabbMax[3];
+};
+
+struct btOptimizedBvhNodeFloatData
+{
+	btVector3FloatData	m_aabbMinOrg;
+	btVector3FloatData	m_aabbMaxOrg;
+	int	m_escapeIndex;
+	int	m_subPart;
+	int	m_triangleIndex;
+	char m_pad[4];
+};
+
+struct btOptimizedBvhNodeDoubleData
+{
+	btVector3DoubleData	m_aabbMinOrg;
+	btVector3DoubleData	m_aabbMaxOrg;
+	int	m_escapeIndex;
+	int	m_subPart;
+	int	m_triangleIndex;
+	char	m_pad[4];
+};
+
+
+struct btQuantizedBvhNodeData
+{
+	unsigned short m_quantizedAabbMin[3];
+	unsigned short m_quantizedAabbMax[3];
+	int	m_escapeIndexOrTriangleIndex;
+};
+
+struct	btQuantizedBvhFloatData
+{
+	btVector3FloatData			m_bvhAabbMin;
+	btVector3FloatData			m_bvhAabbMax;
+	btVector3FloatData			m_bvhQuantization;
+	int					m_curNodeIndex;
+	int					m_useQuantization;
+	int					m_numContiguousLeafNodes;
+	int					m_numQuantizedContiguousNodes;
+	btOptimizedBvhNodeFloatData	*m_contiguousNodesPtr;
+	btQuantizedBvhNodeData		*m_quantizedContiguousNodesPtr;
+	btBvhSubtreeInfoData	*m_subTreeInfoPtr;
+	int					m_traversalMode;
+	int					m_numSubtreeHeaders;
+	
+};
+
+struct	btQuantizedBvhDoubleData
+{
+	btVector3DoubleData			m_bvhAabbMin;
+	btVector3DoubleData			m_bvhAabbMax;
+	btVector3DoubleData			m_bvhQuantization;
+	int							m_curNodeIndex;
+	int							m_useQuantization;
+	int							m_numContiguousLeafNodes;
+	int							m_numQuantizedContiguousNodes;
+	btOptimizedBvhNodeDoubleData	*m_contiguousNodesPtr;
+	btQuantizedBvhNodeData			*m_quantizedContiguousNodesPtr;
+
+	int							m_traversalMode;
+	int							m_numSubtreeHeaders;
+	btBvhSubtreeInfoData		*m_subTreeInfoPtr;
+};
+
+
+SIMD_FORCE_INLINE	int	btQuantizedBvh::calculateSerializeBufferSizeNew() const
+{
+	return sizeof(btQuantizedBvhData);
+}
+
+
+
+#endif //BT_QUANTIZED_BVH_H
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
index caed63db0..752fcd0fe 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.cpp
@@ -20,6 +20,8 @@ subject to the following restrictions:
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btTransform.h"
 #include "LinearMath/btMatrix3x3.h"
+#include "LinearMath/btAabbUtil2.h"
+
 #include <new>
 
 extern int gOverlappingPairs;
@@ -166,6 +168,23 @@ void	btSimpleBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo
 }
 
 
+void	btSimpleBroadphase::aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback)
+{
+	for (int i=0; i <= m_LastHandleIndex; i++)
+	{
+		btSimpleBroadphaseProxy* proxy = &m_pHandles[i];
+		if(!proxy->m_clientObject)
+		{
+			continue;
+		}
+		if (TestAabbAgainstAabb2(aabbMin,aabbMax,proxy->m_aabbMin,proxy->m_aabbMax))
+		{
+			callback.process(proxy);
+		}
+	}
+}
+
+
 
 	
 
diff --git a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
index deffb0a7a..7cb3c40a0 100644
--- a/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
+++ b/Engine/lib/bullet/src/BulletCollision/BroadphaseCollision/btSimpleBroadphase.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SIMPLE_BROADPHASE_H
-#define SIMPLE_BROADPHASE_H
+#ifndef BT_SIMPLE_BROADPHASE_H
+#define BT_SIMPLE_BROADPHASE_H
 
 
 #include "btOverlappingPairCache.h"
@@ -136,6 +136,7 @@ public:
 	virtual void	getAabb(btBroadphaseProxy* proxy,btVector3& aabbMin, btVector3& aabbMax ) const;
 
 	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0),const btVector3& aabbMax=btVector3(0,0,0));
+	virtual void	aabbTest(const btVector3& aabbMin, const btVector3& aabbMax, btBroadphaseAabbCallback& callback);
 		
 	btOverlappingPairCache*	getOverlappingPairCache()
 	{
@@ -166,5 +167,5 @@ public:
 
 
 
-#endif //SIMPLE_BROADPHASE_H
+#endif //BT_SIMPLE_BROADPHASE_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CMakeLists.txt b/Engine/lib/bullet/src/BulletCollision/CMakeLists.txt
index 0e2438b15..e740e98ae 100644
--- a/Engine/lib/bullet/src/BulletCollision/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletCollision/CMakeLists.txt
@@ -1,4 +1,4 @@
-INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src } )
+INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src  )
 
 SET(BulletCollision_SRCS
 	BroadphaseCollision/btAxisSweep3.cpp
@@ -26,6 +26,8 @@ SET(BulletCollision_SRCS
 	CollisionDispatch/btDefaultCollisionConfiguration.cpp
 	CollisionDispatch/btEmptyCollisionAlgorithm.cpp
 	CollisionDispatch/btGhostObject.cpp
+	CollisionDispatch/btInternalEdgeUtility.cpp
+	CollisionDispatch/btInternalEdgeUtility.h
 	CollisionDispatch/btManifoldResult.cpp
 	CollisionDispatch/btSimulationIslandManager.cpp
 	CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp
@@ -44,6 +46,7 @@ SET(BulletCollision_SRCS
 	CollisionShapes/btConvexHullShape.cpp
 	CollisionShapes/btConvexInternalShape.cpp
 	CollisionShapes/btConvexPointCloudShape.cpp
+	CollisionShapes/btConvexPolyhedron.cpp
 	CollisionShapes/btConvexShape.cpp
 	CollisionShapes/btConvex2dShape.cpp
 	CollisionShapes/btConvexTriangleMeshShape.cpp
@@ -90,6 +93,7 @@ SET(BulletCollision_SRCS
 	NarrowPhaseCollision/btRaycastCallback.cpp
 	NarrowPhaseCollision/btSubSimplexConvexCast.cpp
 	NarrowPhaseCollision/btVoronoiSimplexSolver.cpp
+	NarrowPhaseCollision/btPolyhedralContactClipping.cpp
 )
 
 SET(Root_HDRS
@@ -148,6 +152,7 @@ SET(CollisionShapes_HDRS
 	CollisionShapes/btConvexHullShape.h
 	CollisionShapes/btConvexInternalShape.h
 	CollisionShapes/btConvexPointCloudShape.h
+	CollisionShapes/btConvexPolyhedron.h
 	CollisionShapes/btConvexShape.h
 	CollisionShapes/btConvex2dShape.h
 	CollisionShapes/btConvexTriangleMeshShape.h
@@ -170,6 +175,7 @@ SET(CollisionShapes_HDRS
 	CollisionShapes/btTriangleCallback.h
 	CollisionShapes/btTriangleIndexVertexArray.h
 	CollisionShapes/btTriangleIndexVertexMaterialArray.h
+	CollisionShapes/btTriangleInfoMap.h
 	CollisionShapes/btTriangleMesh.h
 	CollisionShapes/btTriangleMeshShape.h
 	CollisionShapes/btTriangleShape.h
@@ -221,6 +227,7 @@ SET(NarrowPhaseCollision_HDRS
 	NarrowPhaseCollision/btSimplexSolverInterface.h
 	NarrowPhaseCollision/btSubSimplexConvexCast.h
 	NarrowPhaseCollision/btVoronoiSimplexSolver.h
+	NarrowPhaseCollision/btPolyhedralContactClipping.h
 )
 
 SET(BulletCollision_HDRS
@@ -241,27 +248,32 @@ IF (BUILD_SHARED_LIBS)
 ENDIF (BUILD_SHARED_LIBS)
 
 
-
-
-#INSTALL of other files requires CMake 2.6
-IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-	IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-		INSTALL(TARGETS BulletCollision DESTINATION .)
-	ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	INSTALL(TARGETS BulletCollision DESTINATION lib)
-        INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
-	ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-
-IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	SET_TARGET_PROPERTIES(BulletCollision PROPERTIES FRAMEWORK true)
-
-	SET_TARGET_PROPERTIES(BulletCollision PROPERTIES PUBLIC_HEADER "${Root_HDRS}")
-	# Have to list out sub-directories manually:
-	SET_PROPERTY(SOURCE ${BroadphaseCollision_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/BroadphaseCollision)
-	SET_PROPERTY(SOURCE ${CollisionDispatch_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/CollisionDispatch)
-	SET_PROPERTY(SOURCE ${CollisionShapes_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/CollisionShapes)
-	SET_PROPERTY(SOURCE ${Gimpact_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Gimpact)
-	SET_PROPERTY(SOURCE ${NarrowPhaseCollision_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/NarrowPhaseCollision)
-
-ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		#INSTALL of other files requires CMake 2.6
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletCollision DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletCollision DESTINATION lib${LIB_SUFFIX})
+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h" PATTERN ".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
+				INSTALL(FILES ../btBulletCollisionCommon.h
+DESTINATION ${INCLUDE_INSTALL_DIR}/BulletCollision)
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+		
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletCollision PROPERTIES FRAMEWORK true)
+		
+			SET_TARGET_PROPERTIES(BulletCollision PROPERTIES PUBLIC_HEADER "${Root_HDRS}")
+			# Have to list out sub-directories manually:
+			SET_PROPERTY(SOURCE ${BroadphaseCollision_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/BroadphaseCollision)
+			SET_PROPERTY(SOURCE ${CollisionDispatch_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/CollisionDispatch)
+			SET_PROPERTY(SOURCE ${CollisionShapes_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/CollisionShapes)
+			SET_PROPERTY(SOURCE ${Gimpact_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Gimpact)
+			SET_PROPERTY(SOURCE ${NarrowPhaseCollision_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/NarrowPhaseCollision)
+		
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp
index f76755fbb..634017809 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp
@@ -57,8 +57,6 @@ void	SphereTriangleDetector::getClosestPoints(const ClosestPointInput& input,Res
 
 }
 
-#define MAX_OVERLAP btScalar(0.)
-
 
 
 // See also geometrictools.com
@@ -93,48 +91,39 @@ bool SphereTriangleDetector::facecontains(const btVector3 &p,const btVector3* ve
 	return pointInTriangle(vertices, lnormal, &lp);
 }
 
-///combined discrete/continuous sphere-triangle
 bool SphereTriangleDetector::collide(const btVector3& sphereCenter,btVector3 &point, btVector3& resultNormal, btScalar& depth, btScalar &timeOfImpact, btScalar contactBreakingThreshold)
 {
 
 	const btVector3* vertices = &m_triangle->getVertexPtr(0);
-	const btVector3& c = sphereCenter;
-	btScalar r = m_sphere->getRadius();
-
-	btVector3 delta (0,0,0);
+	
+	btScalar radius = m_sphere->getRadius();
+	btScalar radiusWithThreshold = radius + contactBreakingThreshold;
 
 	btVector3 normal = (vertices[1]-vertices[0]).cross(vertices[2]-vertices[0]);
 	normal.normalize();
-	btVector3 p1ToCentre = c - vertices[0];
+	btVector3 p1ToCentre = sphereCenter - vertices[0];
 	btScalar distanceFromPlane = p1ToCentre.dot(normal);
 
 	if (distanceFromPlane < btScalar(0.))
 	{
 		//triangle facing the other way
-	
 		distanceFromPlane *= btScalar(-1.);
 		normal *= btScalar(-1.);
 	}
 
-	btScalar contactMargin = contactBreakingThreshold;
-	bool isInsideContactPlane = distanceFromPlane < r + contactMargin;
-	bool isInsideShellPlane = distanceFromPlane < r;
+	bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;
 	
-	btScalar deltaDotNormal = delta.dot(normal);
-	if (!isInsideShellPlane && deltaDotNormal >= btScalar(0.0))
-		return false;
-
 	// Check for contact / intersection
 	bool hasContact = false;
 	btVector3 contactPoint;
 	if (isInsideContactPlane) {
-		if (facecontains(c,vertices,normal)) {
+		if (facecontains(sphereCenter,vertices,normal)) {
 			// Inside the contact wedge - touches a point on the shell plane
 			hasContact = true;
-			contactPoint = c - normal*distanceFromPlane;
+			contactPoint = sphereCenter - normal*distanceFromPlane;
 		} else {
 			// Could be inside one of the contact capsules
-			btScalar contactCapsuleRadiusSqr = (r + contactMargin) * (r + contactMargin);
+			btScalar contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;
 			btVector3 nearestOnEdge;
 			for (int i = 0; i < m_triangle->getNumEdges(); i++) {
 				
@@ -143,7 +132,7 @@ bool SphereTriangleDetector::collide(const btVector3& sphereCenter,btVector3 &po
 				
 				m_triangle->getEdge(i,pa,pb);
 
-				btScalar distanceSqr = SegmentSqrDistance(pa,pb,c, nearestOnEdge);
+				btScalar distanceSqr = SegmentSqrDistance(pa,pb,sphereCenter, nearestOnEdge);
 				if (distanceSqr < contactCapsuleRadiusSqr) {
 					// Yep, we're inside a capsule
 					hasContact = true;
@@ -155,24 +144,26 @@ bool SphereTriangleDetector::collide(const btVector3& sphereCenter,btVector3 &po
 	}
 
 	if (hasContact) {
-		btVector3 contactToCentre = c - contactPoint;
+		btVector3 contactToCentre = sphereCenter - contactPoint;
 		btScalar distanceSqr = contactToCentre.length2();
-		if (distanceSqr < (r - MAX_OVERLAP)*(r - MAX_OVERLAP)) {
-			btScalar distance = btSqrt(distanceSqr);
-			resultNormal = contactToCentre;
-			resultNormal.normalize();
-			point = contactPoint;
-			depth = -(r-distance);
+
+		if (distanceSqr < radiusWithThreshold*radiusWithThreshold)
+		{
+			if (distanceSqr>SIMD_EPSILON)
+			{
+				btScalar distance = btSqrt(distanceSqr);
+				resultNormal = contactToCentre;
+				resultNormal.normalize();
+				point = contactPoint;
+				depth = -(radius-distance);
+			} else
+			{
+				resultNormal = normal;
+				point = contactPoint;
+				depth = -radius;
+			}
 			return true;
 		}
-
-		if (delta.dot(contactToCentre) >= btScalar(0.0)) 
-			return false;
-		
-		// Moving towards the contact point -> collision
-		point = contactPoint;
-		timeOfImpact = btScalar(0.0);
-		return true;
 	}
 	
 	return false;
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.h
index 981bd54e7..22953af43 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/SphereTriangleDetector.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPHERE_TRIANGLE_DETECTOR_H
-#define SPHERE_TRIANGLE_DETECTOR_H
+#ifndef BT_SPHERE_TRIANGLE_DETECTOR_H
+#define BT_SPHERE_TRIANGLE_DETECTOR_H
 
 #include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
 
@@ -34,9 +34,11 @@ struct SphereTriangleDetector : public btDiscreteCollisionDetectorInterface
 
 	virtual ~SphereTriangleDetector() {};
 
+	bool collide(const btVector3& sphereCenter,btVector3 &point, btVector3& resultNormal, btScalar& depth, btScalar &timeOfImpact, btScalar	contactBreakingThreshold);
+
 private:
 
-	bool collide(const btVector3& sphereCenter,btVector3 &point, btVector3& resultNormal, btScalar& depth, btScalar &timeOfImpact, btScalar	contactBreakingThreshold);
+	
 	bool pointInTriangle(const btVector3 vertices[], const btVector3 &normal, btVector3 *p );
 	bool facecontains(const btVector3 &p,const btVector3* vertices,btVector3& normal);
 
@@ -45,5 +47,5 @@ private:
 	btScalar	m_contactBreakingThreshold;
 	
 };
-#endif //SPHERE_TRIANGLE_DETECTOR_H
+#endif //BT_SPHERE_TRIANGLE_DETECTOR_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp
index 7e5da6c58..57f146493 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.cpp
@@ -24,7 +24,7 @@ btActivatingCollisionAlgorithm::btActivatingCollisionAlgorithm (const btCollisio
 //m_colObj1(0)
 {
 }
-btActivatingCollisionAlgorithm::btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* colObj0,btCollisionObject* colObj1)
+btActivatingCollisionAlgorithm::btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* ,const btCollisionObjectWrapper* )
 :btCollisionAlgorithm(ci)
 //,
 //m_colObj0(0),
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h
index 25fe08894..489812b96 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h
@@ -28,7 +28,7 @@ public:
 
 	btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci);
 
-	btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* colObj0,btCollisionObject* colObj1);
+	btActivatingCollisionAlgorithm (const btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap);
 
 	virtual ~btActivatingCollisionAlgorithm();
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp
index 0dddedeca..2c3627782 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp
@@ -22,17 +22,18 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "BulletCollision/CollisionDispatch/btBoxBoxDetector.h"
 #include "BulletCollision/CollisionShapes/btBox2dShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 #define USE_PERSISTENT_CONTACTS 1
 
-btBox2dBox2dCollisionAlgorithm::btBox2dBox2dCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* obj0,btCollisionObject* obj1)
-: btActivatingCollisionAlgorithm(ci,obj0,obj1),
+btBox2dBox2dCollisionAlgorithm::btBox2dBox2dCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* obj0Wrap,const btCollisionObjectWrapper* obj1Wrap)
+: btActivatingCollisionAlgorithm(ci,obj0Wrap,obj1Wrap),
 m_ownManifold(false),
 m_manifoldPtr(mf)
 {
-	if (!m_manifoldPtr && m_dispatcher->needsCollision(obj0,obj1))
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(obj0Wrap->getCollisionObject(),obj1Wrap->getCollisionObject()))
 	{
-		m_manifoldPtr = m_dispatcher->getNewManifold(obj0,obj1);
+		m_manifoldPtr = m_dispatcher->getNewManifold(obj0Wrap->getCollisionObject(),obj1Wrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 }
@@ -52,19 +53,18 @@ btBox2dBox2dCollisionAlgorithm::~btBox2dBox2dCollisionAlgorithm()
 void b2CollidePolygons(btManifoldResult* manifold,  const btBox2dShape* polyA, const btTransform& xfA, const btBox2dShape* polyB, const btTransform& xfB);
 
 //#include <stdio.h>
-void btBox2dBox2dCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btBox2dBox2dCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	if (!m_manifoldPtr)
 		return;
 
-	btCollisionObject*	col0 = body0;
-	btCollisionObject*	col1 = body1;
-	btBox2dShape* box0 = (btBox2dShape*)col0->getCollisionShape();
-	btBox2dShape* box1 = (btBox2dShape*)col1->getCollisionShape();
+	
+	const btBox2dShape* box0 = (const btBox2dShape*)body0Wrap->getCollisionShape();
+	const btBox2dShape* box1 = (const btBox2dShape*)body1Wrap->getCollisionShape();
 
 	resultOut->setPersistentManifold(m_manifoldPtr);
 
-	b2CollidePolygons(resultOut,box0,col0->getWorldTransform(),box1,col1->getWorldTransform());
+	b2CollidePolygons(resultOut,box0,body0Wrap->getWorldTransform(),box1,body1Wrap->getWorldTransform());
 
 	//  refreshContactPoints is only necessary when using persistent contact points. otherwise all points are newly added
 	if (m_ownManifold)
@@ -135,14 +135,13 @@ static int ClipSegmentToLine(ClipVertex vOut[2], ClipVertex vIn[2],
 static btScalar EdgeSeparation(const btBox2dShape* poly1, const btTransform& xf1, int edge1,
 							  const btBox2dShape* poly2, const btTransform& xf2)
 {
-	int count1 = poly1->getVertexCount();
 	const btVector3* vertices1 = poly1->getVertices();
 	const btVector3* normals1 = poly1->getNormals();
 
 	int count2 = poly2->getVertexCount();
 	const btVector3* vertices2 = poly2->getVertices();
 
-	btAssert(0 <= edge1 && edge1 < count1);
+	btAssert(0 <= edge1 && edge1 < poly1->getVertexCount());
 
 	// Convert normal from poly1's frame into poly2's frame.
 	btVector3 normal1World = b2Mul(xf1.getBasis(), normals1[edge1]);
@@ -152,15 +151,8 @@ static btScalar EdgeSeparation(const btBox2dShape* poly1, const btTransform& xf1
 	int index = 0;
 	btScalar minDot = BT_LARGE_FLOAT;
 
-	for (int i = 0; i < count2; ++i)
-	{
-		btScalar dot = b2Dot(vertices2[i], normal1);
-		if (dot < minDot)
-		{
-			minDot = dot;
-			index = i;
-		}
-	}
+    if( count2 > 0 )
+        index = (int) normal1.minDot( vertices2, count2, minDot);
 
 	btVector3 v1 = b2Mul(xf1, vertices1[edge1]);
 	btVector3 v2 = b2Mul(xf2, vertices2[index]);
@@ -182,16 +174,9 @@ static btScalar FindMaxSeparation(int* edgeIndex,
 
 	// Find edge normal on poly1 that has the largest projection onto d.
 	int edge = 0;
-	btScalar maxDot = -BT_LARGE_FLOAT;
-	for (int i = 0; i < count1; ++i)
-	{
-		btScalar dot = b2Dot(normals1[i], dLocal1);
-		if (dot > maxDot)
-		{
-			maxDot = dot;
-			edge = i;
-		}
-	}
+    btScalar maxDot;
+    if( count1 > 0 )
+        edge = (int) dLocal1.maxDot( normals1, count1, maxDot);
 
 	// Get the separation for the edge normal.
 	btScalar s = EdgeSeparation(poly1, xf1, edge, poly2, xf2);
@@ -271,14 +256,13 @@ static void FindIncidentEdge(ClipVertex c[2],
 							 const btBox2dShape* poly1, const btTransform& xf1, int edge1,
 							 const btBox2dShape* poly2, const btTransform& xf2)
 {
-	int count1 = poly1->getVertexCount();
 	const btVector3* normals1 = poly1->getNormals();
 
 	int count2 = poly2->getVertexCount();
 	const btVector3* vertices2 = poly2->getVertices();
 	const btVector3* normals2 = poly2->getNormals();
 
-	btAssert(0 <= edge1 && edge1 < count1);
+	btAssert(0 <= edge1 && edge1 < poly1->getVertexCount());
 
 	// Get the normal of the reference edge in poly2's frame.
 	btVector3 normal1 = b2MulT(xf2.getBasis(), b2Mul(xf1.getBasis(), normals1[edge1]));
@@ -370,7 +354,7 @@ void b2CollidePolygons(btManifoldResult* manifold,
 	btVector3 v11 = vertices1[edge1];
 	btVector3 v12 = edge1 + 1 < count1 ? vertices1[edge1+1] : vertices1[0];
 
-	btVector3 dv = v12 - v11;
+	//btVector3 dv = v12 - v11;
 	btVector3 sideNormal = b2Mul(xf1.getBasis(), v12 - v11);
 	sideNormal.normalize();
 	btVector3 frontNormal = btCrossS(sideNormal, 1.0f);
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h
index 213421752..6ea6e89bd 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
-#define BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
+#ifndef BT_BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
+#define BT_BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
 
 #include "BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -33,11 +33,11 @@ public:
 	btBox2dBox2dCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
 		: btActivatingCollisionAlgorithm(ci) {}
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
-	btBox2dBox2dCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+	btBox2dBox2dCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap);
 
 	virtual ~btBox2dBox2dCollisionAlgorithm();
 
@@ -52,15 +52,15 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			int bbsize = sizeof(btBox2dBox2dCollisionAlgorithm);
 			void* ptr = ci.m_dispatcher1->allocateCollisionAlgorithm(bbsize);
-			return new(ptr) btBox2dBox2dCollisionAlgorithm(0,ci,body0,body1);
+			return new(ptr) btBox2dBox2dCollisionAlgorithm(0,ci,body0Wrap,body1Wrap);
 		}
 	};
 
 };
 
-#endif //BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
+#endif //BT_BOX_2D_BOX_2D__COLLISION_ALGORITHM_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp
index 496288534..ac68968f5 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp
@@ -18,17 +18,17 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btBoxShape.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "btBoxBoxDetector.h"
-
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 #define USE_PERSISTENT_CONTACTS 1
 
-btBoxBoxCollisionAlgorithm::btBoxBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* obj0,btCollisionObject* obj1)
-: btActivatingCollisionAlgorithm(ci,obj0,obj1),
+btBoxBoxCollisionAlgorithm::btBoxBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
+: btActivatingCollisionAlgorithm(ci,body0Wrap,body1Wrap),
 m_ownManifold(false),
 m_manifoldPtr(mf)
 {
-	if (!m_manifoldPtr && m_dispatcher->needsCollision(obj0,obj1))
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject()))
 	{
-		m_manifoldPtr = m_dispatcher->getNewManifold(obj0,obj1);
+		m_manifoldPtr = m_dispatcher->getNewManifold(body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 }
@@ -42,15 +42,14 @@ btBoxBoxCollisionAlgorithm::~btBoxBoxCollisionAlgorithm()
 	}
 }
 
-void btBoxBoxCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btBoxBoxCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	if (!m_manifoldPtr)
 		return;
 
-	btCollisionObject*	col0 = body0;
-	btCollisionObject*	col1 = body1;
-	btBoxShape* box0 = (btBoxShape*)col0->getCollisionShape();
-	btBoxShape* box1 = (btBoxShape*)col1->getCollisionShape();
+	
+	const btBoxShape* box0 = (btBoxShape*)body0Wrap->getCollisionShape();
+	const btBoxShape* box1 = (btBoxShape*)body1Wrap->getCollisionShape();
 
 
 
@@ -62,8 +61,8 @@ void btBoxBoxCollisionAlgorithm::processCollision (btCollisionObject* body0,btCo
 
 	btDiscreteCollisionDetectorInterface::ClosestPointInput input;
 	input.m_maximumDistanceSquared = BT_LARGE_FLOAT;
-	input.m_transformA = body0->getWorldTransform();
-	input.m_transformB = body1->getWorldTransform();
+	input.m_transformA = body0Wrap->getWorldTransform();
+	input.m_transformB = body1Wrap->getWorldTransform();
 
 	btBoxBoxDetector detector(box0,box1);
 	detector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h
index e7d2cc25c..59808df5a 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef BOX_BOX__COLLISION_ALGORITHM_H
-#define BOX_BOX__COLLISION_ALGORITHM_H
+#ifndef BT_BOX_BOX__COLLISION_ALGORITHM_H
+#define BT_BOX_BOX__COLLISION_ALGORITHM_H
 
 #include "btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -33,11 +33,11 @@ public:
 	btBoxBoxCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
 		: btActivatingCollisionAlgorithm(ci) {}
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
-	btBoxBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+	btBoxBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap);
 
 	virtual ~btBoxBoxCollisionAlgorithm();
 
@@ -52,15 +52,15 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			int bbsize = sizeof(btBoxBoxCollisionAlgorithm);
 			void* ptr = ci.m_dispatcher1->allocateCollisionAlgorithm(bbsize);
-			return new(ptr) btBoxBoxCollisionAlgorithm(0,ci,body0,body1);
+			return new(ptr) btBoxBoxCollisionAlgorithm(0,ci,body0Wrap,body1Wrap);
 		}
 	};
 
 };
 
-#endif //BOX_BOX__COLLISION_ALGORITHM_H
+#endif //BT_BOX_BOX__COLLISION_ALGORITHM_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp
index c802bea2b..7043bde34 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.cpp
@@ -1,4 +1,3 @@
-
 /*
  * Box-Box collision detection re-distributed under the ZLib license with permission from Russell L. Smith
  * Original version is from Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.
@@ -25,7 +24,7 @@ subject to the following restrictions:
 #include <float.h>
 #include <string.h>
 
-btBoxBoxDetector::btBoxBoxDetector(btBoxShape* box1,btBoxShape* box2)
+btBoxBoxDetector::btBoxBoxDetector(const btBoxShape* box1,const btBoxShape* box2)
 : m_box1(box1),
 m_box2(box2)
 {
@@ -333,9 +332,9 @@ int dBoxBox2 (const btVector3& p1, const dMatrix3 R1,
 #undef TST
 #define TST(expr1,expr2,n1,n2,n3,cc) \
   s2 = btFabs(expr1) - (expr2); \
-  if (s2 > 0) return 0; \
+  if (s2 > SIMD_EPSILON) return 0; \
   l = btSqrt((n1)*(n1) + (n2)*(n2) + (n3)*(n3)); \
-  if (l > 0) { \
+  if (l > SIMD_EPSILON) { \
     s2 /= l; \
     if (s2*fudge_factor > s) { \
       s = s2; \
@@ -346,6 +345,20 @@ int dBoxBox2 (const btVector3& p1, const dMatrix3 R1,
     } \
   }
 
+  btScalar fudge2 (1.0e-5f);
+
+  Q11 += fudge2;
+  Q12 += fudge2;
+  Q13 += fudge2;
+
+  Q21 += fudge2;
+  Q22 += fudge2;
+  Q23 += fudge2;
+
+  Q31 += fudge2;
+  Q32 += fudge2;
+  Q33 += fudge2;
+
   // separating axis = u1 x (v1,v2,v3)
   TST(pp[2]*R21-pp[1]*R31,(A[1]*Q31+A[2]*Q21+B[1]*Q13+B[2]*Q12),0,-R31,R21,7);
   TST(pp[2]*R22-pp[1]*R32,(A[1]*Q32+A[2]*Q22+B[0]*Q13+B[2]*Q11),0,-R32,R22,8);
@@ -424,6 +437,7 @@ int dBoxBox2 (const btVector3& p1, const dMatrix3 R1,
 		output.addContactPoint(-normal,pointInWorld,-*depth);
 #else
 		output.addContactPoint(-normal,pb,-*depth);
+
 #endif //
 		*return_code = code;
 	}
@@ -593,21 +607,30 @@ int dBoxBox2 (const btVector3& p1, const dMatrix3 R1,
   if (maxc < 1) maxc = 1;
 
   if (cnum <= maxc) {
+
+	  if (code<4) 
+	  {
     // we have less contacts than we need, so we use them all
-    for (j=0; j < cnum; j++) {
-
-		//AddContactPoint...
-
-		//dContactGeom *con = CONTACT(contact,skip*j);
-      //for (i=0; i<3; i++) con->pos[i] = point[j*3+i] + pa[i];
-      //con->depth = dep[j];
-
+    for (j=0; j < cnum; j++) 
+	{
 		btVector3 pointInWorld;
 		for (i=0; i<3; i++) 
 			pointInWorld[i] = point[j*3+i] + pa[i];
 		output.addContactPoint(-normal,pointInWorld,-dep[j]);
 
     }
+	  } else
+	  {
+		  // we have less contacts than we need, so we use them all
+		for (j=0; j < cnum; j++) 
+		{
+			btVector3 pointInWorld;
+			for (i=0; i<3; i++) 
+				pointInWorld[i] = point[j*3+i] + pa[i]-normal[i]*dep[j];
+				//pointInWorld[i] = point[j*3+i] + pa[i];
+			output.addContactPoint(-normal,pointInWorld,-dep[j]);
+		}
+	  }
   }
   else {
     // we have more contacts than are wanted, some of them must be culled.
@@ -632,7 +655,13 @@ int dBoxBox2 (const btVector3& p1, const dMatrix3 R1,
 		btVector3 posInWorld;
 		for (i=0; i<3; i++) 
 			posInWorld[i] = point[iret[j]*3+i] + pa[i];
-		output.addContactPoint(-normal,posInWorld,-dep[iret[j]]);
+		if (code<4) 
+	   {
+			output.addContactPoint(-normal,posInWorld,-dep[iret[j]]);
+		} else
+		{
+			output.addContactPoint(-normal,posInWorld-normal*dep[iret[j]],-dep[iret[j]]);
+		}
     }
     cnum = maxc;
   }
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.h
index 605294d47..392437770 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btBoxBoxDetector.h
@@ -16,8 +16,8 @@ subject to the following restrictions:
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
-#ifndef BOX_BOX_DETECTOR_H
-#define BOX_BOX_DETECTOR_H
+#ifndef BT_BOX_BOX_DETECTOR_H
+#define BT_BOX_BOX_DETECTOR_H
 
 
 class btBoxShape;
@@ -28,12 +28,12 @@ class btBoxShape;
 /// re-distributed under the Zlib license with permission from Russell L. Smith
 struct btBoxBoxDetector : public btDiscreteCollisionDetectorInterface
 {
-	btBoxShape* m_box1;
-	btBoxShape* m_box2;
+	const btBoxShape* m_box1;
+	const btBoxShape* m_box2;
 
 public:
 
-	btBoxBoxDetector(btBoxShape* box1,btBoxShape* box2);
+	btBoxBoxDetector(const btBoxShape* box1,const btBoxShape* box2);
 
 	virtual ~btBoxBoxDetector() {};
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionConfiguration.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionConfiguration.h
index 1db51a36d..f63e0923b 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionConfiguration.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionConfiguration.h
@@ -15,6 +15,7 @@ subject to the following restrictions:
 
 #ifndef BT_COLLISION_CONFIGURATION
 #define BT_COLLISION_CONFIGURATION
+
 struct btCollisionAlgorithmCreateFunc;
 
 class btStackAlloc;
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h
index a6da5f61a..62ee66c4e 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionCreateFunc.h
@@ -13,13 +13,13 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COLLISION_CREATE_FUNC
-#define COLLISION_CREATE_FUNC
+#ifndef BT_COLLISION_CREATE_FUNC
+#define BT_COLLISION_CREATE_FUNC
 
 #include "LinearMath/btAlignedObjectArray.h"
 class btCollisionAlgorithm;
 class btCollisionObject;
-
+struct btCollisionObjectWrapper;
 struct btCollisionAlgorithmConstructionInfo;
 
 ///Used by the btCollisionDispatcher to register and create instances for btCollisionAlgorithm
@@ -33,13 +33,13 @@ struct btCollisionAlgorithmCreateFunc
 	}
 	virtual ~btCollisionAlgorithmCreateFunc(){};
 
-	virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& , btCollisionObject* body0,btCollisionObject* body1)
+	virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& , const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 	{
 		
-		(void)body0;
-		(void)body1;
+		(void)body0Wrap;
+		(void)body1Wrap;
 		return 0;
 	}
 };
-#endif //COLLISION_CREATE_FUNC
+#endif //BT_COLLISION_CREATE_FUNC
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp
index e6ff2130a..d1ef1ffc9 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp
@@ -25,6 +25,7 @@ subject to the following restrictions:
 #include "BulletCollision/BroadphaseCollision/btOverlappingPairCache.h"
 #include "LinearMath/btPoolAllocator.h"
 #include "BulletCollision/CollisionDispatch/btCollisionConfiguration.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 int gNumManifold = 0;
 
@@ -34,9 +35,7 @@ int gNumManifold = 0;
 
 
 btCollisionDispatcher::btCollisionDispatcher (btCollisionConfiguration* collisionConfiguration): 
-	m_count(0),
-	m_useIslands(true),
-	m_staticWarningReported(false),
+m_dispatcherFlags(btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD),
 	m_collisionConfiguration(collisionConfiguration)
 {
 	int i;
@@ -69,31 +68,39 @@ btCollisionDispatcher::~btCollisionDispatcher()
 {
 }
 
-btPersistentManifold*	btCollisionDispatcher::getNewManifold(void* b0,void* b1) 
+btPersistentManifold*	btCollisionDispatcher::getNewManifold(const btCollisionObject* body0,const btCollisionObject* body1) 
 { 
 	gNumManifold++;
 	
 	//btAssert(gNumManifold < 65535);
 	
 
-	btCollisionObject* body0 = (btCollisionObject*)b0;
-	btCollisionObject* body1 = (btCollisionObject*)b1;
 
-	//test for Bullet 2.74: use a relative contact breaking threshold without clamping against 'gContactBreakingThreshold'
-	//btScalar contactBreakingThreshold = btMin(gContactBreakingThreshold,btMin(body0->getCollisionShape()->getContactBreakingThreshold(),body1->getCollisionShape()->getContactBreakingThreshold()));
-	btScalar contactBreakingThreshold = btMin(body0->getCollisionShape()->getContactBreakingThreshold(),body1->getCollisionShape()->getContactBreakingThreshold());
+	//optional relative contact breaking threshold, turned on by default (use setDispatcherFlags to switch off feature for improved performance)
+	
+	btScalar contactBreakingThreshold =  (m_dispatcherFlags & btCollisionDispatcher::CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD) ? 
+		btMin(body0->getCollisionShape()->getContactBreakingThreshold(gContactBreakingThreshold) , body1->getCollisionShape()->getContactBreakingThreshold(gContactBreakingThreshold))
+		: gContactBreakingThreshold ;
 
 	btScalar contactProcessingThreshold = btMin(body0->getContactProcessingThreshold(),body1->getContactProcessingThreshold());
 		
-	void* mem = 0;
+ 	void* mem = 0;
 	
 	if (m_persistentManifoldPoolAllocator->getFreeCount())
 	{
 		mem = m_persistentManifoldPoolAllocator->allocate(sizeof(btPersistentManifold));
 	} else
 	{
-		mem = btAlignedAlloc(sizeof(btPersistentManifold),16);
-
+		//we got a pool memory overflow, by default we fallback to dynamically allocate memory. If we require a contiguous contact pool then assert.
+		if ((m_dispatcherFlags&CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION)==0)
+		{
+			mem = btAlignedAlloc(sizeof(btPersistentManifold),16);
+		} else
+		{
+			btAssert(0);
+			//make sure to increase the m_defaultMaxPersistentManifoldPoolSize in the btDefaultCollisionConstructionInfo/btDefaultCollisionConfiguration
+			return 0;
+		}
 	}
 	btPersistentManifold* manifold = new(mem) btPersistentManifold (body0,body1,0,contactBreakingThreshold,contactProcessingThreshold);
 	manifold->m_index1a = m_manifoldsPtr.size();
@@ -135,14 +142,14 @@ void btCollisionDispatcher::releaseManifold(btPersistentManifold* manifold)
 
 	
 
-btCollisionAlgorithm* btCollisionDispatcher::findAlgorithm(btCollisionObject* body0,btCollisionObject* body1,btPersistentManifold* sharedManifold)
+btCollisionAlgorithm* btCollisionDispatcher::findAlgorithm(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,btPersistentManifold* sharedManifold)
 {
 	
 	btCollisionAlgorithmConstructionInfo ci;
 
 	ci.m_dispatcher1 = this;
 	ci.m_manifold = sharedManifold;
-	btCollisionAlgorithm* algo = m_doubleDispatch[body0->getCollisionShape()->getShapeType()][body1->getCollisionShape()->getShapeType()]->CreateCollisionAlgorithm(ci,body0,body1);
+	btCollisionAlgorithm* algo = m_doubleDispatch[body0Wrap->getCollisionShape()->getShapeType()][body1Wrap->getCollisionShape()->getShapeType()]->CreateCollisionAlgorithm(ci,body0Wrap,body1Wrap);
 
 	return algo;
 }
@@ -150,7 +157,7 @@ btCollisionAlgorithm* btCollisionDispatcher::findAlgorithm(btCollisionObject* bo
 
 
 
-bool	btCollisionDispatcher::needsResponse(btCollisionObject* body0,btCollisionObject* body1)
+bool	btCollisionDispatcher::needsResponse(const btCollisionObject* body0,const btCollisionObject* body1)
 {
 	//here you can do filtering
 	bool hasResponse = 
@@ -161,7 +168,7 @@ bool	btCollisionDispatcher::needsResponse(btCollisionObject* body0,btCollisionOb
 	return hasResponse;
 }
 
-bool	btCollisionDispatcher::needsCollision(btCollisionObject* body0,btCollisionObject* body1)
+bool	btCollisionDispatcher::needsCollision(const btCollisionObject* body0,const btCollisionObject* body1)
 {
 	btAssert(body0);
 	btAssert(body1);
@@ -169,13 +176,12 @@ bool	btCollisionDispatcher::needsCollision(btCollisionObject* body0,btCollisionO
 	bool needsCollision = true;
 
 #ifdef BT_DEBUG
-	if (!m_staticWarningReported)
+	if (!(m_dispatcherFlags & btCollisionDispatcher::CD_STATIC_STATIC_REPORTED))
 	{
 		//broadphase filtering already deals with this
-		if ((body0->isStaticObject() || body0->isKinematicObject()) &&
-			(body1->isStaticObject() || body1->isKinematicObject()))
+		if (body0->isStaticOrKinematicObject() && body1->isStaticOrKinematicObject())
 		{
-			m_staticWarningReported = true;
+			m_dispatcherFlags |= btCollisionDispatcher::CD_STATIC_STATIC_REPORTED;
 			printf("warning btCollisionDispatcher::needsCollision: static-static collision!\n");
 		}
 	}
@@ -252,20 +258,25 @@ void btCollisionDispatcher::defaultNearCallback(btBroadphasePair& collisionPair,
 
 		if (dispatcher.needsCollision(colObj0,colObj1))
 		{
+			btCollisionObjectWrapper obj0Wrap(0,colObj0->getCollisionShape(),colObj0,colObj0->getWorldTransform());
+			btCollisionObjectWrapper obj1Wrap(0,colObj1->getCollisionShape(),colObj1,colObj1->getWorldTransform());
+
+
 			//dispatcher will keep algorithms persistent in the collision pair
 			if (!collisionPair.m_algorithm)
 			{
-				collisionPair.m_algorithm = dispatcher.findAlgorithm(colObj0,colObj1);
+				collisionPair.m_algorithm = dispatcher.findAlgorithm(&obj0Wrap,&obj1Wrap);
 			}
 
 			if (collisionPair.m_algorithm)
 			{
-				btManifoldResult contactPointResult(colObj0,colObj1);
+				btManifoldResult contactPointResult(&obj0Wrap,&obj1Wrap);
 				
 				if (dispatchInfo.m_dispatchFunc == 		btDispatcherInfo::DISPATCH_DISCRETE)
 				{
 					//discrete collision detection query
-					collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
+					
+					collisionPair.m_algorithm->processCollision(&obj0Wrap,&obj1Wrap,dispatchInfo,&contactPointResult);
 				} else
 				{
 					//continuous collision detection query, time of impact (toi)
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
index a9c9cd414..92696ee54 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COLLISION__DISPATCHER_H
-#define COLLISION__DISPATCHER_H
+#ifndef BT_COLLISION__DISPATCHER_H
+#define BT_COLLISION__DISPATCHER_H
 
 #include "BulletCollision/BroadphaseCollision/btDispatcher.h"
 #include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
@@ -42,14 +42,13 @@ typedef void (*btNearCallback)(btBroadphasePair& collisionPair, btCollisionDispa
 ///Time of Impact, Closest Points and Penetration Depth.
 class btCollisionDispatcher : public btDispatcher
 {
-	int m_count;
-	
+
+protected:
+
+	int		m_dispatcherFlags;
+
 	btAlignedObjectArray<btPersistentManifold*>	m_manifoldsPtr;
 
-	bool m_useIslands;
-
-	bool	m_staticWarningReported;
-	
 	btManifoldResult	m_defaultManifoldResult;
 
 	btNearCallback		m_nearCallback;
@@ -59,13 +58,29 @@ class btCollisionDispatcher : public btDispatcher
 	btPoolAllocator*	m_persistentManifoldPoolAllocator;
 
 	btCollisionAlgorithmCreateFunc* m_doubleDispatch[MAX_BROADPHASE_COLLISION_TYPES][MAX_BROADPHASE_COLLISION_TYPES];
-	
 
 	btCollisionConfiguration*	m_collisionConfiguration;
 
 
 public:
 
+	enum DispatcherFlags
+	{
+		CD_STATIC_STATIC_REPORTED = 1,
+		CD_USE_RELATIVE_CONTACT_BREAKING_THRESHOLD = 2,
+		CD_DISABLE_CONTACTPOOL_DYNAMIC_ALLOCATION = 4
+	};
+
+	int	getDispatcherFlags() const
+	{
+		return m_dispatcherFlags;
+	}
+
+	void	setDispatcherFlags(int flags)
+	{
+		m_dispatcherFlags = flags;
+	}
+
 	///registerCollisionCreateFunc allows registration of custom/alternative collision create functions
 	void	registerCollisionCreateFunc(int proxyType0,int proxyType1, btCollisionAlgorithmCreateFunc* createFunc);
 
@@ -76,7 +91,7 @@ public:
 
 	btPersistentManifold**	getInternalManifoldPointer()
 	{
-		return &m_manifoldsPtr[0];
+		return m_manifoldsPtr.size()? &m_manifoldsPtr[0] : 0;
 	}
 
 	 btPersistentManifold* getManifoldByIndexInternal(int index)
@@ -93,19 +108,18 @@ public:
 
 	virtual ~btCollisionDispatcher();
 
-	virtual btPersistentManifold*	getNewManifold(void* b0,void* b1);
+	virtual btPersistentManifold*	getNewManifold(const btCollisionObject* b0,const btCollisionObject* b1);
 	
 	virtual void releaseManifold(btPersistentManifold* manifold);
 
 
 	virtual void clearManifold(btPersistentManifold* manifold);
 
-			
-	btCollisionAlgorithm* findAlgorithm(btCollisionObject* body0,btCollisionObject* body1,btPersistentManifold* sharedManifold = 0);
+	btCollisionAlgorithm* findAlgorithm(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,btPersistentManifold* sharedManifold = 0);
 		
-	virtual bool	needsCollision(btCollisionObject* body0,btCollisionObject* body1);
+	virtual bool	needsCollision(const btCollisionObject* body0,const btCollisionObject* body1);
 	
-	virtual bool	needsResponse(btCollisionObject* body0,btCollisionObject* body1);
+	virtual bool	needsResponse(const btCollisionObject* body0,const btCollisionObject* body1);
 	
 	virtual void	dispatchAllCollisionPairs(btOverlappingPairCache* pairCache,const btDispatcherInfo& dispatchInfo,btDispatcher* dispatcher) ;
 
@@ -141,7 +155,17 @@ public:
 		m_collisionConfiguration = config;
 	}
 
+	virtual	btPoolAllocator*	getInternalManifoldPool()
+	{
+		return m_persistentManifoldPoolAllocator;
+	}
+
+	virtual	const btPoolAllocator*	getInternalManifoldPool() const
+	{
+		return m_persistentManifoldPoolAllocator;
+	}
+
 };
 
-#endif //COLLISION__DISPATCHER_H
+#endif //BT_COLLISION__DISPATCHER_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.cpp
index c04e70c53..cf8ed59a5 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.cpp
@@ -15,6 +15,7 @@ subject to the following restrictions:
 
 
 #include "btCollisionObject.h"
+#include "LinearMath/btSerializer.h"
 
 btCollisionObject::btCollisionObject()
 	:	m_anisotropicFriction(1.f,1.f,1.f),
@@ -22,6 +23,7 @@ btCollisionObject::btCollisionObject()
 	m_contactProcessingThreshold(BT_LARGE_FLOAT),
 		m_broadphaseHandle(0),
 		m_collisionShape(0),
+		m_extensionPointer(0),
 		m_rootCollisionShape(0),
 		m_collisionFlags(btCollisionObject::CF_STATIC_OBJECT),
 		m_islandTag1(-1),
@@ -29,9 +31,10 @@ btCollisionObject::btCollisionObject()
 		m_activationState1(1),
 		m_deactivationTime(btScalar(0.)),
 		m_friction(btScalar(0.5)),
+		m_rollingFriction(0.0f),
 		m_restitution(btScalar(0.)),
-		m_userObjectPointer(0),
 		m_internalType(CO_COLLISION_OBJECT),
+		m_userObjectPointer(0),
 		m_hitFraction(btScalar(1.)),
 		m_ccdSweptSphereRadius(btScalar(0.)),
 		m_ccdMotionThreshold(btScalar(0.)),
@@ -44,18 +47,18 @@ btCollisionObject::~btCollisionObject()
 {
 }
 
-void btCollisionObject::setActivationState(int newState) 
+void btCollisionObject::setActivationState(int newState) const
 { 
 	if ( (m_activationState1 != DISABLE_DEACTIVATION) && (m_activationState1 != DISABLE_SIMULATION))
 		m_activationState1 = newState;
 }
 
-void btCollisionObject::forceActivationState(int newState)
+void btCollisionObject::forceActivationState(int newState) const
 {
 	m_activationState1 = newState;
 }
 
-void btCollisionObject::activate(bool forceActivation)
+void btCollisionObject::activate(bool forceActivation) const
 {
 	if (forceActivation || !(m_collisionFlags & (CF_STATIC_OBJECT|CF_KINEMATIC_OBJECT)))
 	{
@@ -64,5 +67,50 @@ void btCollisionObject::activate(bool forceActivation)
 	}
 }
 
+const char* btCollisionObject::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btCollisionObjectData* dataOut = (btCollisionObjectData*)dataBuffer;
+
+	m_worldTransform.serialize(dataOut->m_worldTransform);
+	m_interpolationWorldTransform.serialize(dataOut->m_interpolationWorldTransform);
+	m_interpolationLinearVelocity.serialize(dataOut->m_interpolationLinearVelocity);
+	m_interpolationAngularVelocity.serialize(dataOut->m_interpolationAngularVelocity);
+	m_anisotropicFriction.serialize(dataOut->m_anisotropicFriction);
+	dataOut->m_hasAnisotropicFriction = m_hasAnisotropicFriction;
+	dataOut->m_contactProcessingThreshold = m_contactProcessingThreshold;
+	dataOut->m_broadphaseHandle = 0;
+	dataOut->m_collisionShape = serializer->getUniquePointer(m_collisionShape);
+	dataOut->m_rootCollisionShape = 0;//@todo
+	dataOut->m_collisionFlags = m_collisionFlags;
+	dataOut->m_islandTag1 = m_islandTag1;
+	dataOut->m_companionId = m_companionId;
+	dataOut->m_activationState1 = m_activationState1;
+	dataOut->m_deactivationTime = m_deactivationTime;
+	dataOut->m_friction = m_friction;
+	dataOut->m_rollingFriction = m_rollingFriction;
+	dataOut->m_restitution = m_restitution;
+	dataOut->m_internalType = m_internalType;
+	
+	char* name = (char*) serializer->findNameForPointer(this);
+	dataOut->m_name = (char*)serializer->getUniquePointer(name);
+	if (dataOut->m_name)
+	{
+		serializer->serializeName(name);
+	}
+	dataOut->m_hitFraction = m_hitFraction;
+	dataOut->m_ccdSweptSphereRadius = m_ccdSweptSphereRadius;
+	dataOut->m_ccdMotionThreshold = m_ccdMotionThreshold;
+	dataOut->m_checkCollideWith = m_checkCollideWith;
+
+	return btCollisionObjectDataName;
+}
 
 
+void btCollisionObject::serializeSingleObject(class btSerializer* serializer) const
+{
+	int len = calculateSerializeBufferSize();
+	btChunk* chunk = serializer->allocate(len,1);
+	const char* structType = serialize(chunk->m_oldPtr, serializer);
+	serializer->finalizeChunk(chunk,structType,BT_COLLISIONOBJECT_CODE,(void*)this);
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.h
index c4968c9b7..2f17967fe 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObject.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COLLISION_OBJECT_H
-#define COLLISION_OBJECT_H
+#ifndef BT_COLLISION_OBJECT_H
+#define BT_COLLISION_OBJECT_H
 
 #include "LinearMath/btTransform.h"
 
@@ -27,13 +27,21 @@ subject to the following restrictions:
 
 struct	btBroadphaseProxy;
 class	btCollisionShape;
+struct btCollisionShapeData;
 #include "LinearMath/btMotionState.h"
 #include "LinearMath/btAlignedAllocator.h"
 #include "LinearMath/btAlignedObjectArray.h"
 
-
 typedef btAlignedObjectArray<class btCollisionObject*> btCollisionObjectArray;
 
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btCollisionObjectData btCollisionObjectDoubleData
+#define btCollisionObjectDataName "btCollisionObjectDoubleData"
+#else
+#define btCollisionObjectData btCollisionObjectFloatData
+#define btCollisionObjectDataName "btCollisionObjectFloatData"
+#endif
+
 
 /// btCollisionObject can be used to manage collision detection objects. 
 /// btCollisionObject maintains all information that is needed for a collision detection: Shape, Transform and AABB proxy.
@@ -53,12 +61,14 @@ protected:
 	btVector3	m_interpolationLinearVelocity;
 	btVector3	m_interpolationAngularVelocity;
 	
-	btVector3		m_anisotropicFriction;
-	bool				m_hasAnisotropicFriction;
-	btScalar		m_contactProcessingThreshold;	
+	btVector3	m_anisotropicFriction;
+	int			m_hasAnisotropicFriction;
+	btScalar	m_contactProcessingThreshold;	
 
 	btBroadphaseProxy*		m_broadphaseHandle;
 	btCollisionShape*		m_collisionShape;
+	///m_extensionPointer is used by some internal low-level Bullet extensions.
+	void*					m_extensionPointer;
 	
 	///m_rootCollisionShape is temporarily used to store the original collision shape
 	///The m_collisionShape might be temporarily replaced by a child collision shape during collision detection purposes
@@ -70,19 +80,20 @@ protected:
 	int				m_islandTag1;
 	int				m_companionId;
 
-	int				m_activationState1;
-	btScalar			m_deactivationTime;
+	mutable int				m_activationState1;
+	mutable btScalar			m_deactivationTime;
 
 	btScalar		m_friction;
 	btScalar		m_restitution;
-
-	///users can point to their objects, m_userPointer is not used by Bullet, see setUserPointer/getUserPointer
-	void*			m_userObjectPointer;
+	btScalar		m_rollingFriction;
 
 	///m_internalType is reserved to distinguish Bullet's btCollisionObject, btRigidBody, btSoftBody, btGhostObject etc.
 	///do not assign your own m_internalType unless you write a new dynamics object class.
 	int				m_internalType;
 
+	///users can point to their objects, m_userPointer is not used by Bullet, see setUserPointer/getUserPointer
+	void*			m_userObjectPointer;
+
 	///time of impact calculation
 	btScalar		m_hitFraction; 
 	
@@ -93,11 +104,9 @@ protected:
 	btScalar		m_ccdMotionThreshold;
 	
 	/// If some object should have elaborate collision filtering by sub-classes
-	bool			m_checkCollideWith;
+	int			m_checkCollideWith;
 
-	char	m_pad[7];
-
-	virtual bool	checkCollideWithOverride(btCollisionObject* /* co */)
+	virtual bool	checkCollideWithOverride(const btCollisionObject* /* co */) const
 	{
 		return true;
 	}
@@ -112,18 +121,28 @@ public:
 		CF_KINEMATIC_OBJECT= 2,
 		CF_NO_CONTACT_RESPONSE = 4,
 		CF_CUSTOM_MATERIAL_CALLBACK = 8,//this allows per-triangle material (friction/restitution)
-		CF_CHARACTER_OBJECT = 16
+		CF_CHARACTER_OBJECT = 16,
+		CF_DISABLE_VISUALIZE_OBJECT = 32, //disable debug drawing
+		CF_DISABLE_SPU_COLLISION_PROCESSING = 64//disable parallel/SPU processing
 	};
 
 	enum	CollisionObjectTypes
 	{
 		CO_COLLISION_OBJECT =1,
-		CO_RIGID_BODY,
+		CO_RIGID_BODY=2,
 		///CO_GHOST_OBJECT keeps track of all objects overlapping its AABB and that pass its collision filter
 		///It is useful for collision sensors, explosion objects, character controller etc.
-		CO_GHOST_OBJECT,
-		CO_SOFT_BODY,
-		CO_HF_FLUID
+		CO_GHOST_OBJECT=4,
+		CO_SOFT_BODY=8,
+		CO_HF_FLUID=16,
+		CO_USER_TYPE=32
+	};
+
+	enum AnisotropicFrictionFlags
+	{
+		CF_ANISOTROPIC_FRICTION_DISABLED=0,
+		CF_ANISOTROPIC_FRICTION = 1,
+		CF_ANISOTROPIC_ROLLING_FRICTION = 2
 	};
 
 	SIMD_FORCE_INLINE bool mergesSimulationIslands() const
@@ -136,14 +155,15 @@ public:
 	{
 		return m_anisotropicFriction;
 	}
-	void	setAnisotropicFriction(const btVector3& anisotropicFriction)
+	void	setAnisotropicFriction(const btVector3& anisotropicFriction, int frictionMode = CF_ANISOTROPIC_FRICTION)
 	{
 		m_anisotropicFriction = anisotropicFriction;
-		m_hasAnisotropicFriction = (anisotropicFriction[0]!=1.f) || (anisotropicFriction[1]!=1.f) || (anisotropicFriction[2]!=1.f);
+		bool isUnity = (anisotropicFriction[0]!=1.f) || (anisotropicFriction[1]!=1.f) || (anisotropicFriction[2]!=1.f);
+		m_hasAnisotropicFriction = isUnity?frictionMode : 0;
 	}
-	bool	hasAnisotropicFriction() const
+	bool	hasAnisotropicFriction(int frictionMode = CF_ANISOTROPIC_FRICTION) const
 	{
-		return m_hasAnisotropicFriction;
+		return (m_hasAnisotropicFriction&frictionMode)!=0;
 	}
 
 	///the constraint solver can discard solving contacts, if the distance is above this threshold. 0 by default.
@@ -196,26 +216,26 @@ public:
 		return m_collisionShape;
 	}
 
-	SIMD_FORCE_INLINE const btCollisionShape*	getRootCollisionShape() const
-	{
-		return m_rootCollisionShape;
-	}
+	
 
-	SIMD_FORCE_INLINE btCollisionShape*	getRootCollisionShape()
-	{
-		return m_rootCollisionShape;
-	}
+	
 
-	///Avoid using this internal API call
-	///internalSetTemporaryCollisionShape is used to temporary replace the actual collision shape by a child collision shape.
-	void	internalSetTemporaryCollisionShape(btCollisionShape* collisionShape)
+	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions. 
+	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
+	void*		internalGetExtensionPointer() const
 	{
-		m_collisionShape = collisionShape;
+		return m_extensionPointer;
+	}
+	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions
+	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
+	void	internalSetExtensionPointer(void* pointer)
+	{
+		m_extensionPointer = pointer;
 	}
 
 	SIMD_FORCE_INLINE	int	getActivationState() const { return m_activationState1;}
 	
-	void setActivationState(int newState);
+	void setActivationState(int newState) const;
 
 	void	setDeactivationTime(btScalar time)
 	{
@@ -226,9 +246,9 @@ public:
 		return m_deactivationTime;
 	}
 
-	void forceActivationState(int newState);
+	void forceActivationState(int newState) const;
 
-	void	activate(bool forceActivation = false);
+	void	activate(bool forceActivation = false) const;
 
 	SIMD_FORCE_INLINE bool isActive() const
 	{
@@ -252,6 +272,16 @@ public:
 		return m_friction;
 	}
 
+	void	setRollingFriction(btScalar frict)
+	{
+		m_rollingFriction = frict;
+	}
+	btScalar	getRollingFriction() const
+	{
+		return m_rollingFriction;
+	}
+
+
 	///reserved for Bullet internal usage
 	int	getInternalType() const
 	{
@@ -409,13 +439,96 @@ public:
 	}
 
 
-	inline bool checkCollideWith(btCollisionObject* co)
+	inline bool checkCollideWith(const btCollisionObject* co) const
 	{
 		if (m_checkCollideWith)
 			return checkCollideWithOverride(co);
 
 		return true;
 	}
+
+	virtual	int	calculateSerializeBufferSize()	const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, class btSerializer* serializer) const;
+
+	virtual void serializeSingleObject(class btSerializer* serializer) const;
+
 };
 
-#endif //COLLISION_OBJECT_H
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCollisionObjectDoubleData
+{
+	void					*m_broadphaseHandle;
+	void					*m_collisionShape;
+	btCollisionShapeData	*m_rootCollisionShape;
+	char					*m_name;
+
+	btTransformDoubleData	m_worldTransform;
+	btTransformDoubleData	m_interpolationWorldTransform;
+	btVector3DoubleData		m_interpolationLinearVelocity;
+	btVector3DoubleData		m_interpolationAngularVelocity;
+	btVector3DoubleData		m_anisotropicFriction;
+	double					m_contactProcessingThreshold;	
+	double					m_deactivationTime;
+	double					m_friction;
+	double					m_rollingFriction;
+	double					m_restitution;
+	double					m_hitFraction; 
+	double					m_ccdSweptSphereRadius;
+	double					m_ccdMotionThreshold;
+
+	int						m_hasAnisotropicFriction;
+	int						m_collisionFlags;
+	int						m_islandTag1;
+	int						m_companionId;
+	int						m_activationState1;
+	int						m_internalType;
+	int						m_checkCollideWith;
+
+	char	m_padding[4];
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCollisionObjectFloatData
+{
+	void					*m_broadphaseHandle;
+	void					*m_collisionShape;
+	btCollisionShapeData	*m_rootCollisionShape;
+	char					*m_name;
+
+	btTransformFloatData	m_worldTransform;
+	btTransformFloatData	m_interpolationWorldTransform;
+	btVector3FloatData		m_interpolationLinearVelocity;
+	btVector3FloatData		m_interpolationAngularVelocity;
+	btVector3FloatData		m_anisotropicFriction;
+	float					m_contactProcessingThreshold;	
+	float					m_deactivationTime;
+	float					m_friction;
+	float					m_rollingFriction;
+
+	float					m_restitution;
+	float					m_hitFraction; 
+	float					m_ccdSweptSphereRadius;
+	float					m_ccdMotionThreshold;
+
+	int						m_hasAnisotropicFriction;
+	int						m_collisionFlags;
+	int						m_islandTag1;
+	int						m_companionId;
+	int						m_activationState1;
+	int						m_internalType;
+	int						m_checkCollideWith;
+	char					m_padding[4];
+};
+
+
+
+SIMD_FORCE_INLINE	int	btCollisionObject::calculateSerializeBufferSize() const
+{
+	return sizeof(btCollisionObjectData);
+}
+
+
+
+#endif //BT_COLLISION_OBJECT_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h
new file mode 100644
index 000000000..2b8ffeaa2
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h
@@ -0,0 +1,40 @@
+#ifndef BT_COLLISION_OBJECT_WRAPPER_H
+#define BT_COLLISION_OBJECT_WRAPPER_H
+
+///btCollisionObjectWrapperis an internal data structure. 
+///Most users can ignore this and use btCollisionObject and btCollisionShape instead
+class btCollisionShape;
+class btCollisionObject;
+class btTransform;
+#include "LinearMath/btScalar.h" // for SIMD_FORCE_INLINE definition
+
+#define BT_DECLARE_STACK_ONLY_OBJECT \
+	private: \
+		void* operator new(size_t size); \
+		void operator delete(void*);
+
+struct btCollisionObjectWrapper;
+struct btCollisionObjectWrapper
+{
+BT_DECLARE_STACK_ONLY_OBJECT
+
+private:
+	btCollisionObjectWrapper(const btCollisionObjectWrapper&); // not implemented. Not allowed.
+	btCollisionObjectWrapper* operator=(const btCollisionObjectWrapper&);
+
+public:
+	const btCollisionObjectWrapper* m_parent;
+	const btCollisionShape* m_shape;
+	const btCollisionObject* m_collisionObject;
+	const btTransform& m_worldTransform;
+
+	btCollisionObjectWrapper(const btCollisionObjectWrapper* parent, const btCollisionShape* shape, const btCollisionObject* collisionObject, const btTransform& worldTransform)
+	: m_parent(parent), m_shape(shape), m_collisionObject(collisionObject), m_worldTransform(worldTransform)
+	{}
+
+	SIMD_FORCE_INLINE const btTransform& getWorldTransform() const { return m_worldTransform; }
+	SIMD_FORCE_INLINE const btCollisionObject* getCollisionObject() const { return m_collisionObject; }
+	SIMD_FORCE_INLINE const btCollisionShape* getCollisionShape() const { return m_shape; }
+};
+
+#endif //BT_COLLISION_OBJECT_WRAPPER_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.cpp
index c5d5646ea..91f4c6200 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.cpp
@@ -26,11 +26,18 @@ subject to the following restrictions:
 #include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
 #include "BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h"
 #include "BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h"
-
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
+#include "BulletCollision/BroadphaseCollision/btDbvt.h"
 #include "LinearMath/btAabbUtil2.h"
 #include "LinearMath/btQuickprof.h"
 #include "LinearMath/btStackAlloc.h"
+#include "LinearMath/btSerializer.h"
+#include "BulletCollision/CollisionShapes/btConvexPolyhedron.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
+
+//#define DISABLE_DBVT_COMPOUNDSHAPE_RAYCAST_ACCELERATION
+
 
 //#define USE_BRUTEFORCE_RAYBROADPHASE 1
 //RECALCULATE_AABB is slower, but benefit is that you don't need to call 'stepSimulation'  or 'updateAabbs' before using a rayTest
@@ -42,6 +49,24 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionDispatch/btCollisionConfiguration.h"
 
 
+///for debug drawing
+
+//for debug rendering
+#include "BulletCollision/CollisionShapes/btBoxShape.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletCollision/CollisionShapes/btCompoundShape.h"
+#include "BulletCollision/CollisionShapes/btConeShape.h"
+#include "BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btCylinderShape.h"
+#include "BulletCollision/CollisionShapes/btMultiSphereShape.h"
+#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleCallback.h"
+#include "BulletCollision/CollisionShapes/btTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+
+
+
 btCollisionWorld::btCollisionWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache, btCollisionConfiguration* collisionConfiguration)
 :m_dispatcher1(dispatcher),
 m_broadphasePairCache(pairCache),
@@ -92,27 +117,27 @@ void	btCollisionWorld::addCollisionObject(btCollisionObject* collisionObject,sho
 	btAssert(collisionObject);
 
 	//check that the object isn't already added
-		btAssert( m_collisionObjects.findLinearSearch(collisionObject)  == m_collisionObjects.size());
+	btAssert( m_collisionObjects.findLinearSearch(collisionObject)  == m_collisionObjects.size());
 
-		m_collisionObjects.push_back(collisionObject);
+	m_collisionObjects.push_back(collisionObject);
 
-		//calculate new AABB
-		btTransform trans = collisionObject->getWorldTransform();
+	//calculate new AABB
+	btTransform trans = collisionObject->getWorldTransform();
 
-		btVector3	minAabb;
-		btVector3	maxAabb;
-		collisionObject->getCollisionShape()->getAabb(trans,minAabb,maxAabb);
+	btVector3	minAabb;
+	btVector3	maxAabb;
+	collisionObject->getCollisionShape()->getAabb(trans,minAabb,maxAabb);
 
-		int type = collisionObject->getCollisionShape()->getShapeType();
-		collisionObject->setBroadphaseHandle( getBroadphase()->createProxy(
-			minAabb,
-			maxAabb,
-			type,
-			collisionObject,
-			collisionFilterGroup,
-			collisionFilterMask,
-			m_dispatcher1,0
-			))	;
+	int type = collisionObject->getCollisionShape()->getShapeType();
+	collisionObject->setBroadphaseHandle( getBroadphase()->createProxy(
+		minAabb,
+		maxAabb,
+		type,
+		collisionObject,
+		collisionFilterGroup,
+		collisionFilterMask,
+		m_dispatcher1,0
+		))	;
 
 
 
@@ -131,6 +156,16 @@ void	btCollisionWorld::updateSingleAabb(btCollisionObject* colObj)
 	minAabb -= contactThreshold;
 	maxAabb += contactThreshold;
 
+	if(getDispatchInfo().m_useContinuous && colObj->getInternalType()==btCollisionObject::CO_RIGID_BODY && !colObj->isStaticOrKinematicObject())
+	{
+		btVector3 minAabb2,maxAabb2;
+		colObj->getCollisionShape()->getAabb(colObj->getInterpolationWorldTransform(),minAabb2,maxAabb2);
+		minAabb2 -= contactThreshold;
+		maxAabb2 += contactThreshold;
+		minAabb.setMin(minAabb2);
+		maxAabb.setMax(maxAabb2);
+	}
+
 	btBroadphaseInterface* bp = (btBroadphaseInterface*)m_broadphasePairCache;
 
 	//moving objects should be moderately sized, probably something wrong if not
@@ -173,6 +208,11 @@ void	btCollisionWorld::updateAabbs()
 }
 
 
+void	btCollisionWorld::computeOverlappingPairs()
+{
+	BT_PROFILE("calculateOverlappingPairs");
+	m_broadphasePairCache->calculateOverlappingPairs(m_dispatcher1);
+}
 
 void	btCollisionWorld::performDiscreteCollisionDetection()
 {
@@ -182,11 +222,7 @@ void	btCollisionWorld::performDiscreteCollisionDetection()
 
 	updateAabbs();
 
-	{
-		BT_PROFILE("calculateOverlappingPairs");
-		m_broadphasePairCache->calculateOverlappingPairs(m_dispatcher1);
-	}
-
+	computeOverlappingPairs();
 
 	btDispatcher* dispatcher = getDispatcher();
 	{
@@ -226,20 +262,29 @@ void	btCollisionWorld::removeCollisionObject(btCollisionObject* collisionObject)
 }
 
 
-
 void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTransform& rayToTrans,
-					  btCollisionObject* collisionObject,
-					  const btCollisionShape* collisionShape,
-					  const btTransform& colObjWorldTransform,
-					  RayResultCallback& resultCallback)
+										btCollisionObject* collisionObject,
+										const btCollisionShape* collisionShape,
+										const btTransform& colObjWorldTransform,
+										RayResultCallback& resultCallback)
+{
+	btCollisionObjectWrapper colObWrap(0,collisionShape,collisionObject,colObjWorldTransform);
+	btCollisionWorld::rayTestSingleInternal(rayFromTrans,rayToTrans,&colObWrap,resultCallback);
+}
+
+void	btCollisionWorld::rayTestSingleInternal(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+										const btCollisionObjectWrapper* collisionObjectWrap,
+										RayResultCallback& resultCallback)
 {
 	btSphereShape pointShape(btScalar(0.0));
 	pointShape.setMargin(0.f);
 	const btConvexShape* castShape = &pointShape;
+	const btCollisionShape* collisionShape = collisionObjectWrap->getCollisionShape();
+	const btTransform& colObjWorldTransform = collisionObjectWrap->getWorldTransform();
 
 	if (collisionShape->isConvex())
 	{
-//		BT_PROFILE("rayTestConvex");
+		//		BT_PROFILE("rayTestConvex");
 		btConvexCast::CastResult castResult;
 		castResult.m_fraction = resultCallback.m_closestHitFraction;
 
@@ -268,10 +313,10 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 					castResult.m_normal.normalize();
 					btCollisionWorld::LocalRayResult localRayResult
 						(
-							collisionObject,
-							0,
-							castResult.m_normal,
-							castResult.m_fraction
+						collisionObjectWrap->getCollisionObject(),
+						0,
+						castResult.m_normal,
+						castResult.m_fraction
 						);
 
 					bool normalInWorldSpace = true;
@@ -283,7 +328,7 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 	} else {
 		if (collisionShape->isConcave())
 		{
-//			BT_PROFILE("rayTestConcave");
+			//			BT_PROFILE("rayTestConcave");
 			if (collisionShape->getShapeType()==TRIANGLE_MESH_SHAPE_PROXYTYPE)
 			{
 				///optimized version for btBvhTriangleMeshShape
@@ -296,21 +341,21 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 				struct BridgeTriangleRaycastCallback : public btTriangleRaycastCallback
 				{
 					btCollisionWorld::RayResultCallback* m_resultCallback;
-					btCollisionObject*	m_collisionObject;
+					const btCollisionObject*	m_collisionObject;
 					btTriangleMeshShape*	m_triangleMesh;
 
-               btTransform m_colObjWorldTransform;
+					btTransform m_colObjWorldTransform;
 
 					BridgeTriangleRaycastCallback( const btVector3& from,const btVector3& to,
-						btCollisionWorld::RayResultCallback* resultCallback, btCollisionObject* collisionObject,btTriangleMeshShape*	triangleMesh,const btTransform& colObjWorldTransform):
-                  //@BP Mod
-						btTriangleRaycastCallback(from,to, resultCallback->m_flags),
-							m_resultCallback(resultCallback),
-							m_collisionObject(collisionObject),
-							m_triangleMesh(triangleMesh),
-                     m_colObjWorldTransform(colObjWorldTransform)
-						{
-						}
+						btCollisionWorld::RayResultCallback* resultCallback, const btCollisionObject* collisionObject,btTriangleMeshShape*	triangleMesh,const btTransform& colObjWorldTransform):
+					//@BP Mod
+					btTriangleRaycastCallback(from,to, resultCallback->m_flags),
+						m_resultCallback(resultCallback),
+						m_collisionObject(collisionObject),
+						m_triangleMesh(triangleMesh),
+						m_colObjWorldTransform(colObjWorldTransform)
+					{
+					}
 
 
 					virtual btScalar reportHit(const btVector3& hitNormalLocal, btScalar hitFraction, int partId, int triangleIndex )
@@ -319,10 +364,10 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 						shapeInfo.m_shapePart = partId;
 						shapeInfo.m_triangleIndex = triangleIndex;
 
-                  btVector3 hitNormalWorld = m_colObjWorldTransform.getBasis() * hitNormalLocal;
+						btVector3 hitNormalWorld = m_colObjWorldTransform.getBasis() * hitNormalLocal;
 
 						btCollisionWorld::LocalRayResult rayResult
-						(m_collisionObject,
+							(m_collisionObject,
 							&shapeInfo,
 							hitNormalWorld,
 							hitFraction);
@@ -333,7 +378,7 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 
 				};
 
-				BridgeTriangleRaycastCallback rcb(rayFromLocal,rayToLocal,&resultCallback,collisionObject,triangleMesh,colObjWorldTransform);
+				BridgeTriangleRaycastCallback rcb(rayFromLocal,rayToLocal,&resultCallback,collisionObjectWrap->getCollisionObject(),triangleMesh,colObjWorldTransform);
 				rcb.m_hitFraction = resultCallback.m_closestHitFraction;
 				triangleMesh->performRaycast(&rcb,rayFromLocal,rayToLocal);
 			} else
@@ -351,21 +396,21 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 				struct BridgeTriangleRaycastCallback : public btTriangleRaycastCallback
 				{
 					btCollisionWorld::RayResultCallback* m_resultCallback;
-					btCollisionObject*	m_collisionObject;
+					const btCollisionObject*	m_collisionObject;
 					btConcaveShape*	m_triangleMesh;
 
-               btTransform m_colObjWorldTransform;
+					btTransform m_colObjWorldTransform;
 
 					BridgeTriangleRaycastCallback( const btVector3& from,const btVector3& to,
-						btCollisionWorld::RayResultCallback* resultCallback, btCollisionObject* collisionObject,btConcaveShape*	triangleMesh, const btTransform& colObjWorldTransform):
-                  //@BP Mod
-                  btTriangleRaycastCallback(from,to, resultCallback->m_flags),
-							m_resultCallback(resultCallback),
-							m_collisionObject(collisionObject),
-							m_triangleMesh(triangleMesh),
-                     m_colObjWorldTransform(colObjWorldTransform)
-						{
-						}
+						btCollisionWorld::RayResultCallback* resultCallback, const btCollisionObject* collisionObject,btConcaveShape*	triangleMesh, const btTransform& colObjWorldTransform):
+					//@BP Mod
+					btTriangleRaycastCallback(from,to, resultCallback->m_flags),
+						m_resultCallback(resultCallback),
+						m_collisionObject(collisionObject),
+						m_triangleMesh(triangleMesh),
+						m_colObjWorldTransform(colObjWorldTransform)
+					{
+					}
 
 
 					virtual btScalar reportHit(const btVector3& hitNormalLocal, btScalar hitFraction, int partId, int triangleIndex )
@@ -374,10 +419,10 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 						shapeInfo.m_shapePart = partId;
 						shapeInfo.m_triangleIndex = triangleIndex;
 
-                  btVector3 hitNormalWorld = m_colObjWorldTransform.getBasis() * hitNormalLocal;
+						btVector3 hitNormalWorld = m_colObjWorldTransform.getBasis() * hitNormalLocal;
 
 						btCollisionWorld::LocalRayResult rayResult
-						(m_collisionObject,
+							(m_collisionObject,
 							&shapeInfo,
 							hitNormalWorld,
 							hitFraction);
@@ -389,7 +434,7 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 				};
 
 
-				BridgeTriangleRaycastCallback	rcb(rayFromLocal,rayToLocal,&resultCallback,collisionObject,concaveShape, colObjWorldTransform);
+				BridgeTriangleRaycastCallback	rcb(rayFromLocal,rayToLocal,&resultCallback,collisionObjectWrap->getCollisionObject(),concaveShape, colObjWorldTransform);
 				rcb.m_hitFraction = resultCallback.m_closestHitFraction;
 
 				btVector3 rayAabbMinLocal = rayFromLocal;
@@ -400,27 +445,116 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 				concaveShape->processAllTriangles(&rcb,rayAabbMinLocal,rayAabbMaxLocal);
 			}
 		} else {
-//			BT_PROFILE("rayTestCompound");
-			///@todo: use AABB tree or other BVH acceleration structure, see btDbvt
+			//			BT_PROFILE("rayTestCompound");
 			if (collisionShape->isCompound())
 			{
-				const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(collisionShape);
-				int i=0;
-				for (i=0;i<compoundShape->getNumChildShapes();i++)
+				struct LocalInfoAdder2 : public RayResultCallback
 				{
-					btTransform childTrans = compoundShape->getChildTransform(i);
-					const btCollisionShape* childCollisionShape = compoundShape->getChildShape(i);
-					btTransform childWorldTrans = colObjWorldTransform * childTrans;
-					// replace collision shape so that callback can determine the triangle
-					btCollisionShape* saveCollisionShape = collisionObject->getCollisionShape();
-					collisionObject->internalSetTemporaryCollisionShape((btCollisionShape*)childCollisionShape);
-					rayTestSingle(rayFromTrans,rayToTrans,
-						collisionObject,
-						childCollisionShape,
-						childWorldTrans,
-						resultCallback);
-					// restore
-					collisionObject->internalSetTemporaryCollisionShape(saveCollisionShape);
+					RayResultCallback* m_userCallback;
+					int m_i;
+					
+					LocalInfoAdder2 (int i, RayResultCallback *user)
+						: m_userCallback(user), m_i(i)
+					{ 
+						m_closestHitFraction = m_userCallback->m_closestHitFraction;
+						m_flags = m_userCallback->m_flags;
+					}
+					virtual bool needsCollision(btBroadphaseProxy* p) const
+					{
+						return m_userCallback->needsCollision(p);
+					}
+
+					virtual btScalar addSingleResult (btCollisionWorld::LocalRayResult &r, bool b)
+					{
+						btCollisionWorld::LocalShapeInfo shapeInfo;
+						shapeInfo.m_shapePart = -1;
+						shapeInfo.m_triangleIndex = m_i;
+						if (r.m_localShapeInfo == NULL)
+							r.m_localShapeInfo = &shapeInfo;
+
+						const btScalar result = m_userCallback->addSingleResult(r, b);
+						m_closestHitFraction = m_userCallback->m_closestHitFraction;
+						return result;
+					}
+				};
+				
+				struct RayTester : btDbvt::ICollide
+				{
+					const btCollisionObject* m_collisionObject;
+					const btCompoundShape* m_compoundShape;
+					const btTransform& m_colObjWorldTransform;
+					const btTransform& m_rayFromTrans;
+					const btTransform& m_rayToTrans;
+					RayResultCallback& m_resultCallback;
+					
+					RayTester(const btCollisionObject* collisionObject,
+							const btCompoundShape* compoundShape,
+							const btTransform& colObjWorldTransform,
+							const btTransform& rayFromTrans,
+							const btTransform& rayToTrans,
+							RayResultCallback& resultCallback):
+						m_collisionObject(collisionObject),
+						m_compoundShape(compoundShape),
+						m_colObjWorldTransform(colObjWorldTransform),
+						m_rayFromTrans(rayFromTrans),
+						m_rayToTrans(rayToTrans),
+						m_resultCallback(resultCallback)
+					{
+						
+					}
+					
+					void ProcessLeaf(int i)
+					{
+						const btCollisionShape* childCollisionShape = m_compoundShape->getChildShape(i);
+						const btTransform& childTrans = m_compoundShape->getChildTransform(i);
+						btTransform childWorldTrans = m_colObjWorldTransform * childTrans;
+						
+						btCollisionObjectWrapper tmpOb(0,childCollisionShape,m_collisionObject,childWorldTrans);
+						// replace collision shape so that callback can determine the triangle
+
+						
+
+						LocalInfoAdder2 my_cb(i, &m_resultCallback);
+
+						rayTestSingleInternal(
+							m_rayFromTrans,
+							m_rayToTrans,
+							&tmpOb,
+							my_cb);
+						
+					}
+				
+					void Process(const btDbvtNode* leaf)
+					{
+						ProcessLeaf(leaf->dataAsInt);
+					}
+				};
+				
+				const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(collisionShape);
+				const btDbvt* dbvt = compoundShape->getDynamicAabbTree();
+
+
+				RayTester rayCB(
+					collisionObjectWrap->getCollisionObject(),
+					compoundShape,
+					colObjWorldTransform,
+					rayFromTrans,
+					rayToTrans,
+					resultCallback);
+#ifndef	DISABLE_DBVT_COMPOUNDSHAPE_RAYCAST_ACCELERATION
+				if (dbvt)
+				{
+					btVector3 localRayFrom = colObjWorldTransform.inverseTimes(rayFromTrans).getOrigin();
+					btVector3 localRayTo = colObjWorldTransform.inverseTimes(rayToTrans).getOrigin();
+					btDbvt::rayTest(dbvt->m_root, localRayFrom , localRayTo, rayCB);
+				}
+				else
+#endif //DISABLE_DBVT_COMPOUNDSHAPE_RAYCAST_ACCELERATION
+				{
+					for (int i = 0, n = compoundShape->getNumChildShapes(); i < n; ++i)
+					{
+						rayCB.ProcessLeaf(i);
+					}	
 				}
 			}
 		}
@@ -428,11 +562,22 @@ void	btCollisionWorld::rayTestSingle(const btTransform& rayFromTrans,const btTra
 }
 
 void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const btTransform& convexFromTrans,const btTransform& convexToTrans,
-					  btCollisionObject* collisionObject,
-					  const btCollisionShape* collisionShape,
-					  const btTransform& colObjWorldTransform,
-					  ConvexResultCallback& resultCallback, btScalar allowedPenetration)
+											btCollisionObject* collisionObject,
+											const btCollisionShape* collisionShape,
+											const btTransform& colObjWorldTransform,
+											ConvexResultCallback& resultCallback, btScalar allowedPenetration)
 {
+	btCollisionObjectWrapper tmpOb(0,collisionShape,collisionObject,colObjWorldTransform);
+	btCollisionWorld::objectQuerySingleInternal(castShape,convexFromTrans,convexToTrans,&tmpOb,resultCallback,allowedPenetration);
+}
+
+void	btCollisionWorld::objectQuerySingleInternal(const btConvexShape* castShape,const btTransform& convexFromTrans,const btTransform& convexToTrans,
+											const btCollisionObjectWrapper* colObjWrap,
+											ConvexResultCallback& resultCallback, btScalar allowedPenetration)
+{
+	const btCollisionShape* collisionShape = colObjWrap->getCollisionShape();
+	const btTransform& colObjWorldTransform = colObjWrap->getWorldTransform();
+
 	if (collisionShape->isConvex())
 	{
 		//BT_PROFILE("convexSweepConvex");
@@ -443,15 +588,15 @@ void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
 		btConvexShape* convexShape = (btConvexShape*) collisionShape;
 		btVoronoiSimplexSolver	simplexSolver;
 		btGjkEpaPenetrationDepthSolver	gjkEpaPenetrationSolver;
-		
+
 		btContinuousConvexCollision convexCaster1(castShape,convexShape,&simplexSolver,&gjkEpaPenetrationSolver);
 		//btGjkConvexCast convexCaster2(castShape,convexShape,&simplexSolver);
 		//btSubsimplexConvexCast convexCaster3(castShape,convexShape,&simplexSolver);
 
 		btConvexCast* castPtr = &convexCaster1;
-	
-	
-		
+
+
+
 		if (castPtr->calcTimeOfImpact(convexFromTrans,convexToTrans,colObjWorldTransform,colObjWorldTransform,castResult))
 		{
 			//add hit
@@ -461,13 +606,13 @@ void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
 				{
 					castResult.m_normal.normalize();
 					btCollisionWorld::LocalConvexResult localConvexResult
-								(
-									collisionObject,
-									0,
-									castResult.m_normal,
-									castResult.m_hitPoint,
-									castResult.m_fraction
-								);
+						(
+						colObjWrap->getCollisionObject(),
+						0,
+						castResult.m_normal,
+						castResult.m_hitPoint,
+						castResult.m_fraction
+						);
 
 					bool normalInWorldSpace = true;
 					resultCallback.addSingleResult(localConvexResult, normalInWorldSpace);
@@ -492,17 +637,17 @@ void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
 				struct BridgeTriangleConvexcastCallback : public btTriangleConvexcastCallback
 				{
 					btCollisionWorld::ConvexResultCallback* m_resultCallback;
-					btCollisionObject*	m_collisionObject;
+					const btCollisionObject*	m_collisionObject;
 					btTriangleMeshShape*	m_triangleMesh;
 
 					BridgeTriangleConvexcastCallback(const btConvexShape* castShape, const btTransform& from,const btTransform& to,
-						btCollisionWorld::ConvexResultCallback* resultCallback, btCollisionObject* collisionObject,btTriangleMeshShape*	triangleMesh, const btTransform& triangleToWorld):
-						btTriangleConvexcastCallback(castShape, from,to, triangleToWorld, triangleMesh->getMargin()),
-							m_resultCallback(resultCallback),
-							m_collisionObject(collisionObject),
-							m_triangleMesh(triangleMesh)
-						{
-						}
+						btCollisionWorld::ConvexResultCallback* resultCallback, const btCollisionObject* collisionObject,btTriangleMeshShape*	triangleMesh, const btTransform& triangleToWorld):
+					btTriangleConvexcastCallback(castShape, from,to, triangleToWorld, triangleMesh->getMargin()),
+						m_resultCallback(resultCallback),
+						m_collisionObject(collisionObject),
+						m_triangleMesh(triangleMesh)
+					{
+					}
 
 
 					virtual btScalar reportHit(const btVector3& hitNormalLocal, const btVector3& hitPointLocal, btScalar hitFraction, int partId, int triangleIndex )
@@ -514,7 +659,7 @@ void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
 						{
 
 							btCollisionWorld::LocalConvexResult convexResult
-							(m_collisionObject,
+								(m_collisionObject,
 								&shapeInfo,
 								hitNormalLocal,
 								hitPointLocal,
@@ -530,30 +675,65 @@ void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
 
 				};
 
-				BridgeTriangleConvexcastCallback tccb(castShape, convexFromTrans,convexToTrans,&resultCallback,collisionObject,triangleMesh, colObjWorldTransform);
+				BridgeTriangleConvexcastCallback tccb(castShape, convexFromTrans,convexToTrans,&resultCallback,colObjWrap->getCollisionObject(),triangleMesh, colObjWorldTransform);
 				tccb.m_hitFraction = resultCallback.m_closestHitFraction;
+				tccb.m_allowedPenetration = allowedPenetration;
 				btVector3 boxMinLocal, boxMaxLocal;
 				castShape->getAabb(rotationXform, boxMinLocal, boxMaxLocal);
 				triangleMesh->performConvexcast(&tccb,convexFromLocal,convexToLocal,boxMinLocal, boxMaxLocal);
 			} else
 			{
-				//BT_PROFILE("convexSweepConcave");
-				btConcaveShape* concaveShape = (btConcaveShape*)collisionShape;
-				btTransform worldTocollisionObject = colObjWorldTransform.inverse();
-				btVector3 convexFromLocal = worldTocollisionObject * convexFromTrans.getOrigin();
-				btVector3 convexToLocal = worldTocollisionObject * convexToTrans.getOrigin();
-				// rotation of box in local mesh space = MeshRotation^-1 * ConvexToRotation
-				btTransform rotationXform = btTransform(worldTocollisionObject.getBasis() * convexToTrans.getBasis());
-
-				//ConvexCast::CastResult
-				struct BridgeTriangleConvexcastCallback : public btTriangleConvexcastCallback
+				if (collisionShape->getShapeType()==STATIC_PLANE_PROXYTYPE)
 				{
-					btCollisionWorld::ConvexResultCallback* m_resultCallback;
-					btCollisionObject*	m_collisionObject;
-					btConcaveShape*	m_triangleMesh;
+					btConvexCast::CastResult castResult;
+					castResult.m_allowedPenetration = allowedPenetration;
+					castResult.m_fraction = resultCallback.m_closestHitFraction;
+					btStaticPlaneShape* planeShape = (btStaticPlaneShape*) collisionShape;
+					btContinuousConvexCollision convexCaster1(castShape,planeShape);
+					btConvexCast* castPtr = &convexCaster1;
 
-					BridgeTriangleConvexcastCallback(const btConvexShape* castShape, const btTransform& from,const btTransform& to,
-						btCollisionWorld::ConvexResultCallback* resultCallback, btCollisionObject* collisionObject,btConcaveShape*	triangleMesh, const btTransform& triangleToWorld):
+					if (castPtr->calcTimeOfImpact(convexFromTrans,convexToTrans,colObjWorldTransform,colObjWorldTransform,castResult))
+					{
+						//add hit
+						if (castResult.m_normal.length2() > btScalar(0.0001))
+						{
+							if (castResult.m_fraction < resultCallback.m_closestHitFraction)
+							{
+								castResult.m_normal.normalize();
+								btCollisionWorld::LocalConvexResult localConvexResult
+									(
+									colObjWrap->getCollisionObject(),
+									0,
+									castResult.m_normal,
+									castResult.m_hitPoint,
+									castResult.m_fraction
+									);
+
+								bool normalInWorldSpace = true;
+								resultCallback.addSingleResult(localConvexResult, normalInWorldSpace);
+							}
+						}
+					}
+
+				} else
+				{
+					//BT_PROFILE("convexSweepConcave");
+					btConcaveShape* concaveShape = (btConcaveShape*)collisionShape;
+					btTransform worldTocollisionObject = colObjWorldTransform.inverse();
+					btVector3 convexFromLocal = worldTocollisionObject * convexFromTrans.getOrigin();
+					btVector3 convexToLocal = worldTocollisionObject * convexToTrans.getOrigin();
+					// rotation of box in local mesh space = MeshRotation^-1 * ConvexToRotation
+					btTransform rotationXform = btTransform(worldTocollisionObject.getBasis() * convexToTrans.getBasis());
+
+					//ConvexCast::CastResult
+					struct BridgeTriangleConvexcastCallback : public btTriangleConvexcastCallback
+					{
+						btCollisionWorld::ConvexResultCallback* m_resultCallback;
+						const btCollisionObject*	m_collisionObject;
+						btConcaveShape*	m_triangleMesh;
+
+						BridgeTriangleConvexcastCallback(const btConvexShape* castShape, const btTransform& from,const btTransform& to,
+							btCollisionWorld::ConvexResultCallback* resultCallback, const btCollisionObject* collisionObject,btConcaveShape*	triangleMesh, const btTransform& triangleToWorld):
 						btTriangleConvexcastCallback(castShape, from,to, triangleToWorld, triangleMesh->getMargin()),
 							m_resultCallback(resultCallback),
 							m_collisionObject(collisionObject),
@@ -562,42 +742,44 @@ void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
 						}
 
 
-					virtual btScalar reportHit(const btVector3& hitNormalLocal, const btVector3& hitPointLocal, btScalar hitFraction, int partId, int triangleIndex )
-					{
-						btCollisionWorld::LocalShapeInfo	shapeInfo;
-						shapeInfo.m_shapePart = partId;
-						shapeInfo.m_triangleIndex = triangleIndex;
-						if (hitFraction <= m_resultCallback->m_closestHitFraction)
+						virtual btScalar reportHit(const btVector3& hitNormalLocal, const btVector3& hitPointLocal, btScalar hitFraction, int partId, int triangleIndex )
 						{
+							btCollisionWorld::LocalShapeInfo	shapeInfo;
+							shapeInfo.m_shapePart = partId;
+							shapeInfo.m_triangleIndex = triangleIndex;
+							if (hitFraction <= m_resultCallback->m_closestHitFraction)
+							{
 
-							btCollisionWorld::LocalConvexResult convexResult
-							(m_collisionObject,
-								&shapeInfo,
-								hitNormalLocal,
-								hitPointLocal,
-								hitFraction);
+								btCollisionWorld::LocalConvexResult convexResult
+									(m_collisionObject,
+									&shapeInfo,
+									hitNormalLocal,
+									hitPointLocal,
+									hitFraction);
 
-							bool	normalInWorldSpace = false;
+								bool	normalInWorldSpace = false;
 
-							return m_resultCallback->addSingleResult(convexResult,normalInWorldSpace);
+								return m_resultCallback->addSingleResult(convexResult,normalInWorldSpace);
+							}
+							return hitFraction;
 						}
-						return hitFraction;
-					}
 
-				};
+					};
 
-				BridgeTriangleConvexcastCallback tccb(castShape, convexFromTrans,convexToTrans,&resultCallback,collisionObject,concaveShape, colObjWorldTransform);
-				tccb.m_hitFraction = resultCallback.m_closestHitFraction;
-				btVector3 boxMinLocal, boxMaxLocal;
-				castShape->getAabb(rotationXform, boxMinLocal, boxMaxLocal);
+					BridgeTriangleConvexcastCallback tccb(castShape, convexFromTrans,convexToTrans,&resultCallback,colObjWrap->getCollisionObject(),concaveShape, colObjWorldTransform);
+					tccb.m_hitFraction = resultCallback.m_closestHitFraction;
+					tccb.m_allowedPenetration = allowedPenetration;
+					btVector3 boxMinLocal, boxMaxLocal;
+					castShape->getAabb(rotationXform, boxMinLocal, boxMaxLocal);
 
-				btVector3 rayAabbMinLocal = convexFromLocal;
-				rayAabbMinLocal.setMin(convexToLocal);
-				btVector3 rayAabbMaxLocal = convexFromLocal;
-				rayAabbMaxLocal.setMax(convexToLocal);
-				rayAabbMinLocal += boxMinLocal;
-				rayAabbMaxLocal += boxMaxLocal;
-				concaveShape->processAllTriangles(&tccb,rayAabbMinLocal,rayAabbMaxLocal);
+					btVector3 rayAabbMinLocal = convexFromLocal;
+					rayAabbMinLocal.setMin(convexToLocal);
+					btVector3 rayAabbMaxLocal = convexFromLocal;
+					rayAabbMaxLocal.setMax(convexToLocal);
+					rayAabbMinLocal += boxMinLocal;
+					rayAabbMaxLocal += boxMaxLocal;
+					concaveShape->processAllTriangles(&tccb,rayAabbMinLocal,rayAabbMaxLocal);
+				}
 			}
 		} else {
 			///@todo : use AABB tree or other BVH acceleration structure!
@@ -611,16 +793,41 @@ void	btCollisionWorld::objectQuerySingle(const btConvexShape* castShape,const bt
 					btTransform childTrans = compoundShape->getChildTransform(i);
 					const btCollisionShape* childCollisionShape = compoundShape->getChildShape(i);
 					btTransform childWorldTrans = colObjWorldTransform * childTrans;
-					// replace collision shape so that callback can determine the triangle
-					btCollisionShape* saveCollisionShape = collisionObject->getCollisionShape();
-					collisionObject->internalSetTemporaryCollisionShape((btCollisionShape*)childCollisionShape);
-					objectQuerySingle(castShape, convexFromTrans,convexToTrans,
-						collisionObject,
-						childCollisionShape,
-						childWorldTrans,
-						resultCallback, allowedPenetration);
-					// restore
-					collisionObject->internalSetTemporaryCollisionShape(saveCollisionShape);
+					
+                    struct	LocalInfoAdder : public ConvexResultCallback {
+                            ConvexResultCallback* m_userCallback;
+							int m_i;
+
+                            LocalInfoAdder (int i, ConvexResultCallback *user)
+								: m_userCallback(user), m_i(i)
+							{
+								m_closestHitFraction = m_userCallback->m_closestHitFraction;
+							}
+							virtual bool needsCollision(btBroadphaseProxy* p) const
+							{
+								return m_userCallback->needsCollision(p);
+							}
+                            virtual btScalar addSingleResult (btCollisionWorld::LocalConvexResult&	r,	bool b)
+                            {
+                                    btCollisionWorld::LocalShapeInfo	shapeInfo;
+                                    shapeInfo.m_shapePart = -1;
+                                    shapeInfo.m_triangleIndex = m_i;
+                                    if (r.m_localShapeInfo == NULL)
+                                        r.m_localShapeInfo = &shapeInfo;
+									const btScalar result = m_userCallback->addSingleResult(r, b);
+									m_closestHitFraction = m_userCallback->m_closestHitFraction;
+									return result;
+                                    
+                            }
+                    };
+
+                    LocalInfoAdder my_cb(i, &resultCallback);
+					
+					btCollisionObjectWrapper tmpObj(colObjWrap,childCollisionShape,colObjWrap->getCollisionObject(),childWorldTrans);
+
+					objectQuerySingleInternal(castShape, convexFromTrans,convexToTrans,
+						&tmpObj,my_cb, allowedPenetration);
+					
 				}
 			}
 		}
@@ -641,10 +848,10 @@ struct btSingleRayCallback : public btBroadphaseRayCallback
 	btCollisionWorld::RayResultCallback&	m_resultCallback;
 
 	btSingleRayCallback(const btVector3& rayFromWorld,const btVector3& rayToWorld,const btCollisionWorld* world,btCollisionWorld::RayResultCallback& resultCallback)
-	:m_rayFromWorld(rayFromWorld),
-	m_rayToWorld(rayToWorld),
-	m_world(world),
-	m_resultCallback(resultCallback)
+		:m_rayFromWorld(rayFromWorld),
+		m_rayToWorld(rayToWorld),
+		m_world(world),
+		m_resultCallback(resultCallback)
 	{
 		m_rayFromTrans.setIdentity();
 		m_rayFromTrans.setOrigin(m_rayFromWorld);
@@ -666,7 +873,7 @@ struct btSingleRayCallback : public btBroadphaseRayCallback
 
 	}
 
-	
+
 
 	virtual bool	process(const btBroadphaseProxy* proxy)
 	{
@@ -697,9 +904,9 @@ struct btSingleRayCallback : public btBroadphaseRayCallback
 			{
 				m_world->rayTestSingle(m_rayFromTrans,m_rayToTrans,
 					collisionObject,
-						collisionObject->getCollisionShape(),
-						collisionObject->getWorldTransform(),
-						m_resultCallback);
+					collisionObject->getCollisionShape(),
+					collisionObject->getWorldTransform(),
+					m_resultCallback);
 			}
 		}
 		return true;
@@ -771,13 +978,13 @@ struct btSingleSweepCallback : public btBroadphaseRayCallback
 		if(m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) {
 			//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
 			m_world->objectQuerySingle(m_castShape, m_convexFromTrans,m_convexToTrans,
-					collisionObject,
-						collisionObject->getCollisionShape(),
-						collisionObject->getWorldTransform(),
-						m_resultCallback,
-						m_allowedCcdPenetration);
+				collisionObject,
+				collisionObject->getCollisionShape(),
+				collisionObject->getWorldTransform(),
+				m_resultCallback,
+				m_allowedCcdPenetration);
 		}
-		
+
 		return true;
 	}
 };
@@ -792,7 +999,7 @@ void	btCollisionWorld::convexSweepTest(const btConvexShape* castShape, const btT
 	/// and for each object with ray-aabb overlap, perform an exact ray test
 	/// unfortunately the implementation for rayTest and convexSweepTest duplicated, albeit practically identical
 
-	
+
 
 	btTransform	convexFromTrans,convexToTrans;
 	convexFromTrans = convexFromWorld;
@@ -801,13 +1008,13 @@ void	btCollisionWorld::convexSweepTest(const btConvexShape* castShape, const btT
 	/* Compute AABB that encompasses angular movement */
 	{
 		btVector3 linVel, angVel;
-		btTransformUtil::calculateVelocity (convexFromTrans, convexToTrans, 1.0, linVel, angVel);
+		btTransformUtil::calculateVelocity (convexFromTrans, convexToTrans, 1.0f, linVel, angVel);
 		btVector3 zeroLinVel;
 		zeroLinVel.setValue(0,0,0);
 		btTransform R;
 		R.setIdentity ();
 		R.setRotation (convexFromTrans.getRotation());
-		castShape->calculateTemporalAabb (R, zeroLinVel, angVel, 1.0, castShapeAabbMin, castShapeAabbMax);
+		castShape->calculateTemporalAabb (R, zeroLinVel, angVel, 1.0f, castShapeAabbMin, castShapeAabbMax);
 	}
 
 #ifndef USE_BRUTEFORCE_RAYBROADPHASE
@@ -835,12 +1042,498 @@ void	btCollisionWorld::convexSweepTest(const btConvexShape* castShape, const btT
 			{
 				objectQuerySingle(castShape, convexFromTrans,convexToTrans,
 					collisionObject,
-						collisionObject->getCollisionShape(),
-						collisionObject->getWorldTransform(),
-						resultCallback,
-						allowedCcdPenetration);
+					collisionObject->getCollisionShape(),
+					collisionObject->getWorldTransform(),
+					resultCallback,
+					allowedCcdPenetration);
 			}
 		}
 	}
 #endif //USE_BRUTEFORCE_RAYBROADPHASE
 }
+
+
+
+struct btBridgedManifoldResult : public btManifoldResult
+{
+
+	btCollisionWorld::ContactResultCallback&	m_resultCallback;
+
+	btBridgedManifoldResult( const btCollisionObjectWrapper* obj0Wrap,const btCollisionObjectWrapper* obj1Wrap,btCollisionWorld::ContactResultCallback& resultCallback )
+		:btManifoldResult(obj0Wrap,obj1Wrap),
+		m_resultCallback(resultCallback)
+	{
+	}
+
+	virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
+	{
+		bool isSwapped = m_manifoldPtr->getBody0() != m_body0Wrap->getCollisionObject();
+		btVector3 pointA = pointInWorld + normalOnBInWorld * depth;
+		btVector3 localA;
+		btVector3 localB;
+		if (isSwapped)
+		{
+			localA = m_body1Wrap->getCollisionObject()->getWorldTransform().invXform(pointA );
+			localB = m_body0Wrap->getCollisionObject()->getWorldTransform().invXform(pointInWorld);
+		} else
+		{
+			localA = m_body0Wrap->getCollisionObject()->getWorldTransform().invXform(pointA );
+			localB = m_body1Wrap->getCollisionObject()->getWorldTransform().invXform(pointInWorld);
+		}
+		
+		btManifoldPoint newPt(localA,localB,normalOnBInWorld,depth);
+		newPt.m_positionWorldOnA = pointA;
+		newPt.m_positionWorldOnB = pointInWorld;
+		
+	   //BP mod, store contact triangles.
+		if (isSwapped)
+		{
+			newPt.m_partId0 = m_partId1;
+			newPt.m_partId1 = m_partId0;
+			newPt.m_index0  = m_index1;
+			newPt.m_index1  = m_index0;
+		} else
+		{
+			newPt.m_partId0 = m_partId0;
+			newPt.m_partId1 = m_partId1;
+			newPt.m_index0  = m_index0;
+			newPt.m_index1  = m_index1;
+		}
+
+		//experimental feature info, for per-triangle material etc.
+		const btCollisionObjectWrapper* obj0Wrap = isSwapped? m_body1Wrap : m_body0Wrap;
+		const btCollisionObjectWrapper* obj1Wrap = isSwapped? m_body0Wrap : m_body1Wrap;
+		m_resultCallback.addSingleResult(newPt,obj0Wrap,newPt.m_partId0,newPt.m_index0,obj1Wrap,newPt.m_partId1,newPt.m_index1);
+
+	}
+	
+};
+
+
+
+struct btSingleContactCallback : public btBroadphaseAabbCallback
+{
+
+	btCollisionObject* m_collisionObject;
+	btCollisionWorld*	m_world;
+	btCollisionWorld::ContactResultCallback&	m_resultCallback;
+	
+	
+	btSingleContactCallback(btCollisionObject* collisionObject, btCollisionWorld* world,btCollisionWorld::ContactResultCallback& resultCallback)
+		:m_collisionObject(collisionObject),
+		m_world(world),
+		m_resultCallback(resultCallback)
+	{
+	}
+
+	virtual bool	process(const btBroadphaseProxy* proxy)
+	{
+		btCollisionObject*	collisionObject = (btCollisionObject*)proxy->m_clientObject;
+		if (collisionObject == m_collisionObject)
+			return true;
+
+		//only perform raycast if filterMask matches
+		if(m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) 
+		{
+			btCollisionObjectWrapper ob0(0,m_collisionObject->getCollisionShape(),m_collisionObject,m_collisionObject->getWorldTransform());
+			btCollisionObjectWrapper ob1(0,collisionObject->getCollisionShape(),collisionObject,collisionObject->getWorldTransform());
+
+			btCollisionAlgorithm* algorithm = m_world->getDispatcher()->findAlgorithm(&ob0,&ob1);
+			if (algorithm)
+			{
+				btBridgedManifoldResult contactPointResult(&ob0,&ob1, m_resultCallback);
+				//discrete collision detection query
+				
+				algorithm->processCollision(&ob0,&ob1, m_world->getDispatchInfo(),&contactPointResult);
+
+				algorithm->~btCollisionAlgorithm();
+				m_world->getDispatcher()->freeCollisionAlgorithm(algorithm);
+			}
+		}
+		return true;
+	}
+};
+
+
+///contactTest performs a discrete collision test against all objects in the btCollisionWorld, and calls the resultCallback.
+///it reports one or more contact points for every overlapping object (including the one with deepest penetration)
+void	btCollisionWorld::contactTest( btCollisionObject* colObj, ContactResultCallback& resultCallback)
+{
+	btVector3 aabbMin,aabbMax;
+	colObj->getCollisionShape()->getAabb(colObj->getWorldTransform(),aabbMin,aabbMax);
+	btSingleContactCallback	contactCB(colObj,this,resultCallback);
+	
+	m_broadphasePairCache->aabbTest(aabbMin,aabbMax,contactCB);
+}
+
+
+///contactTest performs a discrete collision test between two collision objects and calls the resultCallback if overlap if detected.
+///it reports one or more contact points (including the one with deepest penetration)
+void	btCollisionWorld::contactPairTest(btCollisionObject* colObjA, btCollisionObject* colObjB, ContactResultCallback& resultCallback)
+{
+	btCollisionObjectWrapper obA(0,colObjA->getCollisionShape(),colObjA,colObjA->getWorldTransform());
+	btCollisionObjectWrapper obB(0,colObjB->getCollisionShape(),colObjB,colObjB->getWorldTransform());
+
+	btCollisionAlgorithm* algorithm = getDispatcher()->findAlgorithm(&obA,&obB);
+	if (algorithm)
+	{
+		btBridgedManifoldResult contactPointResult(&obA,&obB, resultCallback);
+		//discrete collision detection query
+		algorithm->processCollision(&obA,&obB, getDispatchInfo(),&contactPointResult);
+
+		algorithm->~btCollisionAlgorithm();
+		getDispatcher()->freeCollisionAlgorithm(algorithm);
+	}
+
+}
+
+
+
+
+class DebugDrawcallback : public btTriangleCallback, public btInternalTriangleIndexCallback
+{
+	btIDebugDraw*	m_debugDrawer;
+	btVector3	m_color;
+	btTransform	m_worldTrans;
+
+public:
+
+	DebugDrawcallback(btIDebugDraw*	debugDrawer,const btTransform& worldTrans,const btVector3& color) :
+	  m_debugDrawer(debugDrawer),
+		  m_color(color),
+		  m_worldTrans(worldTrans)
+	  {
+	  }
+
+	  virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex)
+	  {
+		  processTriangle(triangle,partId,triangleIndex);
+	  }
+
+	  virtual void processTriangle(btVector3* triangle,int partId, int triangleIndex)
+	  {
+		  (void)partId;
+		  (void)triangleIndex;
+
+		  btVector3 wv0,wv1,wv2;
+		  wv0 = m_worldTrans*triangle[0];
+		  wv1 = m_worldTrans*triangle[1];
+		  wv2 = m_worldTrans*triangle[2];
+		  btVector3 center = (wv0+wv1+wv2)*btScalar(1./3.);
+          
+          if (m_debugDrawer->getDebugMode() & btIDebugDraw::DBG_DrawNormals )
+          {
+		    btVector3 normal = (wv1-wv0).cross(wv2-wv0);
+		    normal.normalize();
+		    btVector3 normalColor(1,1,0);
+		    m_debugDrawer->drawLine(center,center+normal,normalColor);
+          }
+		  m_debugDrawer->drawLine(wv0,wv1,m_color);
+		  m_debugDrawer->drawLine(wv1,wv2,m_color);
+		  m_debugDrawer->drawLine(wv2,wv0,m_color);
+	  }
+};
+
+
+void btCollisionWorld::debugDrawObject(const btTransform& worldTransform, const btCollisionShape* shape, const btVector3& color)
+{
+	// Draw a small simplex at the center of the object
+	getDebugDrawer()->drawTransform(worldTransform,1);
+
+	if (shape->getShapeType() == COMPOUND_SHAPE_PROXYTYPE)
+	{
+		const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(shape);
+		for (int i=compoundShape->getNumChildShapes()-1;i>=0;i--)
+		{
+			btTransform childTrans = compoundShape->getChildTransform(i);
+			const btCollisionShape* colShape = compoundShape->getChildShape(i);
+			debugDrawObject(worldTransform*childTrans,colShape,color);
+		}
+
+	} else
+	{
+
+        switch (shape->getShapeType())
+        {
+
+        case BOX_SHAPE_PROXYTYPE:
+            {
+                const btBoxShape* boxShape = static_cast<const btBoxShape*>(shape);
+                btVector3 halfExtents = boxShape->getHalfExtentsWithMargin();
+                getDebugDrawer()->drawBox(-halfExtents,halfExtents,worldTransform,color);
+                break;
+            }
+
+        case SPHERE_SHAPE_PROXYTYPE:
+            {
+                const btSphereShape* sphereShape = static_cast<const btSphereShape*>(shape);
+                btScalar radius = sphereShape->getMargin();//radius doesn't include the margin, so draw with margin
+
+                getDebugDrawer()->drawSphere(radius, worldTransform, color);
+                break;
+            }
+        case MULTI_SPHERE_SHAPE_PROXYTYPE:
+            {
+                const btMultiSphereShape* multiSphereShape = static_cast<const btMultiSphereShape*>(shape);
+
+                btTransform childTransform;
+                childTransform.setIdentity();
+
+                for (int i = multiSphereShape->getSphereCount()-1; i>=0;i--)
+                {
+                    childTransform.setOrigin(multiSphereShape->getSpherePosition(i));
+                    getDebugDrawer()->drawSphere(multiSphereShape->getSphereRadius(i), worldTransform*childTransform, color);
+                }
+
+                break;
+            }
+        case CAPSULE_SHAPE_PROXYTYPE:
+            {
+                const btCapsuleShape* capsuleShape = static_cast<const btCapsuleShape*>(shape);
+
+                btScalar radius = capsuleShape->getRadius();
+                btScalar halfHeight = capsuleShape->getHalfHeight();
+
+                int upAxis = capsuleShape->getUpAxis();
+                getDebugDrawer()->drawCapsule(radius, halfHeight, upAxis, worldTransform, color);
+                break;
+            }
+        case CONE_SHAPE_PROXYTYPE:
+            {
+                const btConeShape* coneShape = static_cast<const btConeShape*>(shape);
+                btScalar radius = coneShape->getRadius();//+coneShape->getMargin();
+                btScalar height = coneShape->getHeight();//+coneShape->getMargin();
+
+                int upAxis= coneShape->getConeUpIndex();
+                getDebugDrawer()->drawCone(radius, height, upAxis, worldTransform, color);
+                break;
+
+            }
+        case CYLINDER_SHAPE_PROXYTYPE:
+            {
+                const btCylinderShape* cylinder = static_cast<const btCylinderShape*>(shape);
+                int upAxis = cylinder->getUpAxis();
+                btScalar radius = cylinder->getRadius();
+                btScalar halfHeight = cylinder->getHalfExtentsWithMargin()[upAxis];
+                getDebugDrawer()->drawCylinder(radius, halfHeight, upAxis, worldTransform, color);
+                break;
+            }
+
+        case STATIC_PLANE_PROXYTYPE:
+            {
+                const btStaticPlaneShape* staticPlaneShape = static_cast<const btStaticPlaneShape*>(shape);
+                btScalar planeConst = staticPlaneShape->getPlaneConstant();
+                const btVector3& planeNormal = staticPlaneShape->getPlaneNormal();
+                getDebugDrawer()->drawPlane(planeNormal, planeConst,worldTransform, color);
+                break;
+
+            }
+        default:
+            {
+
+                /// for polyhedral shapes
+                if (shape->isPolyhedral())
+                {
+                    btPolyhedralConvexShape* polyshape = (btPolyhedralConvexShape*) shape;
+                    
+                    int i;
+                    if (polyshape->getConvexPolyhedron())
+                    {
+                        const btConvexPolyhedron* poly = polyshape->getConvexPolyhedron();
+                        for (i=0;i<poly->m_faces.size();i++)
+                        {
+                            btVector3 centroid(0,0,0);
+                            int numVerts = poly->m_faces[i].m_indices.size();
+                            if (numVerts)
+                            {
+                                int lastV = poly->m_faces[i].m_indices[numVerts-1];
+                                for (int v=0;v<poly->m_faces[i].m_indices.size();v++)
+                                {
+                                    int curVert = poly->m_faces[i].m_indices[v];
+                                    centroid+=poly->m_vertices[curVert];
+                                    getDebugDrawer()->drawLine(worldTransform*poly->m_vertices[lastV],worldTransform*poly->m_vertices[curVert],color);
+                                    lastV = curVert;
+                                }
+                            }
+                            centroid*= btScalar(1.f)/btScalar(numVerts);
+                            if (getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawNormals)
+                            {
+                                btVector3 normalColor(1,1,0);
+                                btVector3 faceNormal(poly->m_faces[i].m_plane[0],poly->m_faces[i].m_plane[1],poly->m_faces[i].m_plane[2]);
+                                getDebugDrawer()->drawLine(worldTransform*centroid,worldTransform*(centroid+faceNormal),normalColor);
+                            }
+                            
+                        }
+                        
+                        
+                    } else
+                    {
+                        for (i=0;i<polyshape->getNumEdges();i++)
+                        {
+                            btVector3 a,b;
+                            polyshape->getEdge(i,a,b);
+                            btVector3 wa = worldTransform * a;
+                            btVector3 wb = worldTransform * b;
+                            getDebugDrawer()->drawLine(wa,wb,color);
+                        }
+                    }
+                    
+                    
+                }
+                    
+                if (shape->isConcave())
+                {
+                    btConcaveShape* concaveMesh = (btConcaveShape*) shape;
+
+                    ///@todo pass camera, for some culling? no -> we are not a graphics lib
+                    btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+                    btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+
+                    DebugDrawcallback drawCallback(getDebugDrawer(),worldTransform,color);
+                    concaveMesh->processAllTriangles(&drawCallback,aabbMin,aabbMax);
+
+                }
+
+                if (shape->getShapeType() == CONVEX_TRIANGLEMESH_SHAPE_PROXYTYPE)
+                {
+                    btConvexTriangleMeshShape* convexMesh = (btConvexTriangleMeshShape*) shape;
+                    //todo: pass camera for some culling			
+                    btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+                    btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
+                    //DebugDrawcallback drawCallback;
+                    DebugDrawcallback drawCallback(getDebugDrawer(),worldTransform,color);
+                    convexMesh->getMeshInterface()->InternalProcessAllTriangles(&drawCallback,aabbMin,aabbMax);
+                }
+
+
+                
+            }
+       
+		}
+	}
+}
+
+
+void	btCollisionWorld::debugDrawWorld()
+{
+	if (getDebugDrawer() && getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawContactPoints)
+	{
+		int numManifolds = getDispatcher()->getNumManifolds();
+		btVector3 color(1,1,0);
+		for (int i=0;i<numManifolds;i++)
+		{
+			btPersistentManifold* contactManifold = getDispatcher()->getManifoldByIndexInternal(i);
+			//btCollisionObject* obA = static_cast<btCollisionObject*>(contactManifold->getBody0());
+			//btCollisionObject* obB = static_cast<btCollisionObject*>(contactManifold->getBody1());
+
+			int numContacts = contactManifold->getNumContacts();
+			for (int j=0;j<numContacts;j++)
+			{
+				btManifoldPoint& cp = contactManifold->getContactPoint(j);
+				getDebugDrawer()->drawContactPoint(cp.m_positionWorldOnB,cp.m_normalWorldOnB,cp.getDistance(),cp.getLifeTime(),color);
+			}
+		}
+	}
+
+	if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe | btIDebugDraw::DBG_DrawAabb)))
+	{
+		int i;
+
+		for (  i=0;i<m_collisionObjects.size();i++)
+		{
+			btCollisionObject* colObj = m_collisionObjects[i];
+			if ((colObj->getCollisionFlags() & btCollisionObject::CF_DISABLE_VISUALIZE_OBJECT)==0)
+			{
+				if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawWireframe))
+				{
+					btVector3 color(btScalar(1.),btScalar(1.),btScalar(1.));
+					switch(colObj->getActivationState())
+					{
+					case  ACTIVE_TAG:
+						color = btVector3(btScalar(1.),btScalar(1.),btScalar(1.)); break;
+					case ISLAND_SLEEPING:
+						color =  btVector3(btScalar(0.),btScalar(1.),btScalar(0.));break;
+					case WANTS_DEACTIVATION:
+						color = btVector3(btScalar(0.),btScalar(1.),btScalar(1.));break;
+					case DISABLE_DEACTIVATION:
+						color = btVector3(btScalar(1.),btScalar(0.),btScalar(0.));break;
+					case DISABLE_SIMULATION:
+						color = btVector3(btScalar(1.),btScalar(1.),btScalar(0.));break;
+					default:
+						{
+							color = btVector3(btScalar(1),btScalar(0.),btScalar(0.));
+						}
+					};
+
+					debugDrawObject(colObj->getWorldTransform(),colObj->getCollisionShape(),color);
+				}
+				if (m_debugDrawer && (m_debugDrawer->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
+				{
+					btVector3 minAabb,maxAabb;
+					btVector3 colorvec(1,0,0);
+					colObj->getCollisionShape()->getAabb(colObj->getWorldTransform(), minAabb,maxAabb);
+					btVector3 contactThreshold(gContactBreakingThreshold,gContactBreakingThreshold,gContactBreakingThreshold);
+					minAabb -= contactThreshold;
+					maxAabb += contactThreshold;
+
+					btVector3 minAabb2,maxAabb2;
+
+					if(getDispatchInfo().m_useContinuous && colObj->getInternalType()==btCollisionObject::CO_RIGID_BODY && !colObj->isStaticOrKinematicObject())
+					{
+						colObj->getCollisionShape()->getAabb(colObj->getInterpolationWorldTransform(),minAabb2,maxAabb2);
+						minAabb2 -= contactThreshold;
+						maxAabb2 += contactThreshold;
+						minAabb.setMin(minAabb2);
+						maxAabb.setMax(maxAabb2);
+					}
+
+					m_debugDrawer->drawAabb(minAabb,maxAabb,colorvec);
+				}
+			}
+
+		}
+	}
+}
+
+
+void	btCollisionWorld::serializeCollisionObjects(btSerializer* serializer)
+{
+	int i;
+	//serialize all collision objects
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		if (colObj->getInternalType() == btCollisionObject::CO_COLLISION_OBJECT)
+		{
+			colObj->serializeSingleObject(serializer);
+		}
+	}
+
+	///keep track of shapes already serialized
+	btHashMap<btHashPtr,btCollisionShape*>	serializedShapes;
+
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		btCollisionShape* shape = colObj->getCollisionShape();
+
+		if (!serializedShapes.find(shape))
+		{
+			serializedShapes.insert(shape,shape);
+			shape->serializeSingleShape(serializer);
+		}
+	}
+
+}
+
+
+void	btCollisionWorld::serialize(btSerializer* serializer)
+{
+
+	serializer->startSerialization();
+	
+	serializeCollisionObjects(serializer);
+	
+	serializer->finishSerialization();
+}
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.h
index 983037e69..9412242e8 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCollisionWorld.h
@@ -22,6 +22,7 @@ subject to the following restrictions:
  *
  * Bullet is a Collision Detection and Rigid Body Dynamics Library. The Library is Open Source and free for commercial use, under the ZLib license ( http://opensource.org/licenses/zlib-license.php ).
  *
+ * The main documentation is Bullet_User_Manual.pdf, included in the source code distribution.
  * There is the Physics Forum for feedback and general Collision Detection and Physics discussions.
  * Please visit http://www.bulletphysics.com
  *
@@ -29,14 +30,16 @@ subject to the following restrictions:
  *
  * @subsection step1 Step 1: Download
  * You can download the Bullet Physics Library from the Google Code repository: http://code.google.com/p/bullet/downloads/list
+ *
  * @subsection step2 Step 2: Building
- * Bullet comes with autogenerated Project Files for Microsoft Visual Studio 6, 7, 7.1 and 8.
- * The main Workspace/Solution is located in Bullet/msvc/8/wksbullet.sln (replace 8 with your version).
- * 
- * Under other platforms, like Linux or Mac OS-X, Bullet can be build using either using make, cmake, http://www.cmake.org , or jam, http://www.perforce.com/jam/jam.html . cmake can autogenerate Xcode, KDevelop, MSVC and other build systems. just run cmake . in the root of Bullet.
- * So if you are not using MSVC or cmake, you can run ./autogen.sh ./configure to create both Makefile and Jamfile and then run make or jam.
- * Jam is a build system that can build the library, demos and also autogenerate the MSVC Project Files.
- * If you don't have jam installed, you can make jam from the included jam-2.5 sources, or download jam from ftp://ftp.perforce.com/jam
+ * Bullet main build system for all platforms is cmake, you can download http://www.cmake.org
+ * cmake can autogenerate projectfiles for Microsoft Visual Studio, Apple Xcode, KDevelop and Unix Makefiles.
+ * The easiest is to run the CMake cmake-gui graphical user interface and choose the options and generate projectfiles.
+ * You can also use cmake in the command-line. Here are some examples for various platforms:
+ * cmake . -G "Visual Studio 9 2008"
+ * cmake . -G Xcode
+ * cmake . -G "Unix Makefiles"
+ * Although cmake is recommended, you can also use autotools for UNIX: ./autogen.sh ./configure to create a Makefile and then run make.
  * 
  * @subsection step3 Step 3: Testing demos
  * Try to run and experiment with BasicDemo executable as a starting point.
@@ -53,21 +56,21 @@ subject to the following restrictions:
  * Bullet has been designed in a modular way keeping dependencies to a minimum. The ConvexHullDistance demo demonstrates direct use of btGjkPairDetector.
  *
  * @section copyright Copyright
- * Copyright (C) 2005-2008 Erwin Coumans, some contributions Copyright Gino van den Bergen, Christer Ericson, Simon Hobbs, Ricardo Padrela, F Richter(res), Stephane Redon
- * Special thanks to all visitors of the Bullet Physics forum, and in particular above contributors, John McCutchan, Nathanael Presson, Dave Eberle, Dirk Gregorius, Erin Catto, Dave Eberle, Adam Moravanszky,
- * Pierre Terdiman, Kenny Erleben, Russell Smith, Oliver Strunk, Jan Paul van Waveren, Marten Svanfeldt.
+ * For up-to-data information and copyright and contributors list check out the Bullet_User_Manual.pdf
  * 
  */
  
  
 
-#ifndef COLLISION_WORLD_H
-#define COLLISION_WORLD_H
+#ifndef BT_COLLISION_WORLD_H
+#define BT_COLLISION_WORLD_H
 
 class btStackAlloc;
 class btCollisionShape;
 class btConvexShape;
 class btBroadphaseInterface;
+class btSerializer;
+
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btTransform.h"
 #include "btCollisionObject.h"
@@ -98,6 +101,8 @@ protected:
 	///it is true by default, because it is error-prone (setting the position of static objects wouldn't update their AABB)
 	bool m_forceUpdateAllAabbs;
 
+	void	serializeCollisionObjects(btSerializer* serializer);
+
 public:
 
 	//this constructor doesn't own the dispatcher and paircache/broadphase
@@ -139,6 +144,11 @@ public:
 	void	updateSingleAabb(btCollisionObject* colObj);
 
 	virtual void	updateAabbs();
+
+	///the computeOverlappingPairs is usually already called by performDiscreteCollisionDetection (or stepSimulation)
+	///it can be useful to use if you perform ray tests without collision detection/simulation
+	virtual void	computeOverlappingPairs();
+
 	
 	virtual void	setDebugDrawer(btIDebugDraw*	debugDrawer)
 	{
@@ -150,6 +160,10 @@ public:
 		return m_debugDrawer;
 	}
 
+	virtual void	debugDrawWorld();
+
+	virtual void debugDrawObject(const btTransform& worldTransform, const btCollisionShape* shape, const btVector3& color);
+
 
 	///LocalShapeInfo gives extra information for complex shapes
 	///Currently, only btTriangleMeshShape is available, so it just contains triangleIndex and subpart
@@ -164,7 +178,7 @@ public:
 
 	struct	LocalRayResult
 	{
-		LocalRayResult(btCollisionObject*	collisionObject, 
+		LocalRayResult(const btCollisionObject*	collisionObject, 
 			LocalShapeInfo*	localShapeInfo,
 			const btVector3&		hitNormalLocal,
 			btScalar hitFraction)
@@ -175,7 +189,7 @@ public:
 		{
 		}
 
-		btCollisionObject*		m_collisionObject;
+		const btCollisionObject*		m_collisionObject;
 		LocalShapeInfo*			m_localShapeInfo;
 		btVector3				m_hitNormalLocal;
 		btScalar				m_hitFraction;
@@ -186,11 +200,11 @@ public:
 	struct	RayResultCallback
 	{
 		btScalar	m_closestHitFraction;
-		btCollisionObject*		m_collisionObject;
+		const btCollisionObject*		m_collisionObject;
 		short int	m_collisionFilterGroup;
 		short int	m_collisionFilterMask;
-      //@BP Mod - Custom flags, currently used to enable backface culling on tri-meshes, see btRaycastCallback
-      unsigned int m_flags;
+		//@BP Mod - Custom flags, currently used to enable backface culling on tri-meshes, see btRaycastCallback.h. Apply any of the EFlags defined there on m_flags here to invoke.
+		unsigned int m_flags;
 
 		virtual ~RayResultCallback()
 		{
@@ -205,8 +219,8 @@ public:
 			m_collisionObject(0),
 			m_collisionFilterGroup(btBroadphaseProxy::DefaultFilter),
 			m_collisionFilterMask(btBroadphaseProxy::AllFilter),
-         //@BP Mod
-         m_flags(0)
+			//@BP Mod
+			m_flags(0)
 		{
 		}
 
@@ -255,10 +269,49 @@ public:
 		}
 	};
 
+	struct	AllHitsRayResultCallback : public RayResultCallback
+	{
+		AllHitsRayResultCallback(const btVector3&	rayFromWorld,const btVector3&	rayToWorld)
+		:m_rayFromWorld(rayFromWorld),
+		m_rayToWorld(rayToWorld)
+		{
+		}
+
+		btAlignedObjectArray<const btCollisionObject*>		m_collisionObjects;
+
+		btVector3	m_rayFromWorld;//used to calculate hitPointWorld from hitFraction
+		btVector3	m_rayToWorld;
+
+		btAlignedObjectArray<btVector3>	m_hitNormalWorld;
+		btAlignedObjectArray<btVector3>	m_hitPointWorld;
+		btAlignedObjectArray<btScalar> m_hitFractions;
+			
+		virtual	btScalar	addSingleResult(LocalRayResult& rayResult,bool normalInWorldSpace)
+		{
+			m_collisionObject = rayResult.m_collisionObject;
+			m_collisionObjects.push_back(rayResult.m_collisionObject);
+			btVector3 hitNormalWorld;
+			if (normalInWorldSpace)
+			{
+				hitNormalWorld = rayResult.m_hitNormalLocal;
+			} else
+			{
+				///need to transform normal into worldspace
+				hitNormalWorld = m_collisionObject->getWorldTransform().getBasis()*rayResult.m_hitNormalLocal;
+			}
+			m_hitNormalWorld.push_back(hitNormalWorld);
+			btVector3 hitPointWorld;
+			hitPointWorld.setInterpolate3(m_rayFromWorld,m_rayToWorld,rayResult.m_hitFraction);
+			m_hitPointWorld.push_back(hitPointWorld);
+			m_hitFractions.push_back(rayResult.m_hitFraction);
+			return m_closestHitFraction;
+		}
+	};
+
 
 	struct LocalConvexResult
 	{
-		LocalConvexResult(btCollisionObject*	hitCollisionObject, 
+		LocalConvexResult(const btCollisionObject*	hitCollisionObject, 
 			LocalShapeInfo*	localShapeInfo,
 			const btVector3&		hitNormalLocal,
 			const btVector3&		hitPointLocal,
@@ -272,7 +325,7 @@ public:
 		{
 		}
 
-		btCollisionObject*		m_hitCollisionObject;
+		const btCollisionObject*		m_hitCollisionObject;
 		LocalShapeInfo*			m_localShapeInfo;
 		btVector3				m_hitNormalLocal;
 		btVector3				m_hitPointLocal;
@@ -328,7 +381,7 @@ public:
 
 		btVector3	m_hitNormalWorld;
 		btVector3	m_hitPointWorld;
-		btCollisionObject*	m_hitCollisionObject;
+		const btCollisionObject*	m_hitCollisionObject;
 		
 		virtual	btScalar	addSingleResult(LocalConvexResult& convexResult,bool normalInWorldSpace)
 		{
@@ -350,6 +403,34 @@ public:
 		}
 	};
 
+	///ContactResultCallback is used to report contact points
+	struct	ContactResultCallback
+	{
+		short int	m_collisionFilterGroup;
+		short int	m_collisionFilterMask;
+		
+		ContactResultCallback()
+			:m_collisionFilterGroup(btBroadphaseProxy::DefaultFilter),
+			m_collisionFilterMask(btBroadphaseProxy::AllFilter)
+		{
+		}
+
+		virtual ~ContactResultCallback()
+		{
+		}
+		
+		virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+		{
+			bool collides = (proxy0->m_collisionFilterGroup & m_collisionFilterMask) != 0;
+			collides = collides && (m_collisionFilterGroup & proxy0->m_collisionFilterMask);
+			return collides;
+		}
+
+		virtual	btScalar	addSingleResult(btManifoldPoint& cp,	const btCollisionObjectWrapper* colObj0Wrap,int partId0,int index0,const btCollisionObjectWrapper* colObj1Wrap,int partId1,int index1) = 0;
+	};
+
+
+
 	int	getNumCollisionObjects() const
 	{
 		return int(m_collisionObjects.size());
@@ -357,12 +438,20 @@ public:
 
 	/// rayTest performs a raycast on all objects in the btCollisionWorld, and calls the resultCallback
 	/// This allows for several queries: first hit, all hits, any hit, dependent on the value returned by the callback.
-	void	rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const; 
+	virtual void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const; 
 
-	// convexTest performs a swept convex cast on all objects in the btCollisionWorld, and calls the resultCallback
-	// This allows for several queries: first hit, all hits, any hit, dependent on the value return by the callback.
+	/// convexTest performs a swept convex cast on all objects in the btCollisionWorld, and calls the resultCallback
+	/// This allows for several queries: first hit, all hits, any hit, dependent on the value return by the callback.
 	void    convexSweepTest (const btConvexShape* castShape, const btTransform& from, const btTransform& to, ConvexResultCallback& resultCallback,  btScalar allowedCcdPenetration = btScalar(0.)) const;
 
+	///contactTest performs a discrete collision test between colObj against all objects in the btCollisionWorld, and calls the resultCallback.
+	///it reports one or more contact points for every overlapping object (including the one with deepest penetration)
+	void	contactTest(btCollisionObject* colObj, ContactResultCallback& resultCallback);
+
+	///contactTest performs a discrete collision test between two collision objects and calls the resultCallback if overlap if detected.
+	///it reports one or more contact points (including the one with deepest penetration)
+	void	contactPairTest(btCollisionObject* colObjA, btCollisionObject* colObjB, ContactResultCallback& resultCallback);
+
 
 	/// rayTestSingle performs a raycast call and calls the resultCallback. It is used internally by rayTest.
 	/// In a future implementation, we consider moving the ray test as a virtual method in btCollisionShape.
@@ -373,6 +462,10 @@ public:
 					  const btTransform& colObjWorldTransform,
 					  RayResultCallback& resultCallback);
 
+	static void	rayTestSingleInternal(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+					  const btCollisionObjectWrapper* collisionObjectWrap,
+					  RayResultCallback& resultCallback);
+
 	/// objectQuerySingle performs a collision detection query and calls the resultCallback. It is used internally by rayTest.
 	static void	objectQuerySingle(const btConvexShape* castShape, const btTransform& rayFromTrans,const btTransform& rayToTrans,
 					  btCollisionObject* collisionObject,
@@ -380,6 +473,10 @@ public:
 					  const btTransform& colObjWorldTransform,
 					  ConvexResultCallback& resultCallback, btScalar	allowedPenetration);
 
+	static void	objectQuerySingleInternal(const btConvexShape* castShape,const btTransform& convexFromTrans,const btTransform& convexToTrans,
+											const btCollisionObjectWrapper* colObjWrap,
+											ConvexResultCallback& resultCallback, btScalar allowedPenetration);
+
 	virtual void	addCollisionObject(btCollisionObject* collisionObject,short int collisionFilterGroup=btBroadphaseProxy::DefaultFilter,short int collisionFilterMask=btBroadphaseProxy::AllFilter);
 
 	btCollisionObjectArray& getCollisionObjectArray()
@@ -416,7 +513,10 @@ public:
 		m_forceUpdateAllAabbs = forceUpdateAllAabbs;
 	}
 
+	///Preliminary serialization test for Bullet 2.76. Loading those files requires a separate parser (Bullet/Demos/SerializeDemo)
+	virtual	void	serialize(btSerializer* serializer);
+
 };
 
 
-#endif //COLLISION_WORLD_H
+#endif //BT_COLLISION_WORLD_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
index 250c6badc..290d67797 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
@@ -20,30 +20,32 @@ subject to the following restrictions:
 #include "LinearMath/btIDebugDraw.h"
 #include "LinearMath/btAabbUtil2.h"
 #include "btManifoldResult.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
-btCompoundCollisionAlgorithm::btCompoundCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped)
-:btActivatingCollisionAlgorithm(ci,body0,body1),
+btCompoundCollisionAlgorithm::btCompoundCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped)
+:btActivatingCollisionAlgorithm(ci,body0Wrap,body1Wrap),
 m_isSwapped(isSwapped),
 m_sharedManifold(ci.m_manifold)
 {
 	m_ownsManifold = false;
 
-	btCollisionObject* colObj = m_isSwapped? body1 : body0;
-	btAssert (colObj->getCollisionShape()->isCompound());
+	const btCollisionObjectWrapper* colObjWrap = m_isSwapped? body1Wrap : body0Wrap;
+	btAssert (colObjWrap->getCollisionShape()->isCompound());
 	
-	btCompoundShape* compoundShape = static_cast<btCompoundShape*>(colObj->getCollisionShape());
+	const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(colObjWrap->getCollisionShape());
 	m_compoundShapeRevision = compoundShape->getUpdateRevision();
 	
-	preallocateChildAlgorithms(body0,body1);
+	
+	preallocateChildAlgorithms(body0Wrap,body1Wrap);
 }
 
-void	btCompoundCollisionAlgorithm::preallocateChildAlgorithms(btCollisionObject* body0,btCollisionObject* body1)
+void	btCompoundCollisionAlgorithm::preallocateChildAlgorithms(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 {
-	btCollisionObject* colObj = m_isSwapped? body1 : body0;
-	btCollisionObject* otherObj = m_isSwapped? body0 : body1;
-	btAssert (colObj->getCollisionShape()->isCompound());
+	const btCollisionObjectWrapper* colObjWrap = m_isSwapped? body1Wrap : body0Wrap;
+	const btCollisionObjectWrapper* otherObjWrap = m_isSwapped? body0Wrap : body1Wrap;
+	btAssert (colObjWrap->getCollisionShape()->isCompound());
 	
-	btCompoundShape* compoundShape = static_cast<btCompoundShape*>(colObj->getCollisionShape());
+	const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(colObjWrap->getCollisionShape());
 
 	int numChildren = compoundShape->getNumChildShapes();
 	int i;
@@ -56,11 +58,11 @@ void	btCompoundCollisionAlgorithm::preallocateChildAlgorithms(btCollisionObject*
 			m_childCollisionAlgorithms[i] = 0;
 		} else
 		{
-			btCollisionShape* tmpShape = colObj->getCollisionShape();
-			btCollisionShape* childShape = compoundShape->getChildShape(i);
-			colObj->internalSetTemporaryCollisionShape( childShape );
-			m_childCollisionAlgorithms[i] = m_dispatcher->findAlgorithm(colObj,otherObj,m_sharedManifold);
-			colObj->internalSetTemporaryCollisionShape( tmpShape );
+			
+			const btCollisionShape* childShape = compoundShape->getChildShape(i);
+
+			btCollisionObjectWrapper childWrap(colObjWrap,childShape,colObjWrap->getCollisionObject(),colObjWrap->getWorldTransform());//wrong child trans, but unused (hopefully)
+			m_childCollisionAlgorithms[i] = m_dispatcher->findAlgorithm(&childWrap,otherObjWrap,m_sharedManifold);
 		}
 	}
 }
@@ -92,19 +94,16 @@ struct	btCompoundLeafCallback : btDbvt::ICollide
 
 public:
 
-	btCollisionObject* m_compoundColObj;
-	btCollisionObject* m_otherObj;
+	const btCollisionObjectWrapper* m_compoundColObjWrap;
+	const btCollisionObjectWrapper* m_otherObjWrap;
 	btDispatcher* m_dispatcher;
 	const btDispatcherInfo& m_dispatchInfo;
 	btManifoldResult*	m_resultOut;
 	btCollisionAlgorithm**	m_childCollisionAlgorithms;
 	btPersistentManifold*	m_sharedManifold;
-
-
-
-
-	btCompoundLeafCallback (btCollisionObject* compoundObj,btCollisionObject* otherObj,btDispatcher* dispatcher,const btDispatcherInfo& dispatchInfo,btManifoldResult*	resultOut,btCollisionAlgorithm**	childCollisionAlgorithms,btPersistentManifold*	sharedManifold)
-		:m_compoundColObj(compoundObj),m_otherObj(otherObj),m_dispatcher(dispatcher),m_dispatchInfo(dispatchInfo),m_resultOut(resultOut),
+	
+	btCompoundLeafCallback (const btCollisionObjectWrapper* compoundObjWrap,const btCollisionObjectWrapper* otherObjWrap,btDispatcher* dispatcher,const btDispatcherInfo& dispatchInfo,btManifoldResult*	resultOut,btCollisionAlgorithm**	childCollisionAlgorithms,btPersistentManifold*	sharedManifold)
+		:m_compoundColObjWrap(compoundObjWrap),m_otherObjWrap(otherObjWrap),m_dispatcher(dispatcher),m_dispatchInfo(dispatchInfo),m_resultOut(resultOut),
 		m_childCollisionAlgorithms(childCollisionAlgorithms),
 		m_sharedManifold(sharedManifold)
 	{
@@ -112,72 +111,89 @@ public:
 	}
 
 
-	void	ProcessChildShape(btCollisionShape* childShape,int index)
+	void	ProcessChildShape(const btCollisionShape* childShape,int index)
 	{
-		
-		btCompoundShape* compoundShape = static_cast<btCompoundShape*>(m_compoundColObj->getCollisionShape());
+		btAssert(index>=0);
+		const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(m_compoundColObjWrap->getCollisionShape());
+		btAssert(index<compoundShape->getNumChildShapes());
 
 
 		//backup
-		btTransform	orgTrans = m_compoundColObj->getWorldTransform();
-		btTransform	orgInterpolationTrans = m_compoundColObj->getInterpolationWorldTransform();
+		btTransform	orgTrans = m_compoundColObjWrap->getWorldTransform();
+		btTransform	orgInterpolationTrans = m_compoundColObjWrap->getWorldTransform();
 		const btTransform& childTrans = compoundShape->getChildTransform(index);
 		btTransform	newChildWorldTrans = orgTrans*childTrans ;
 
 		//perform an AABB check first
 		btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;
 		childShape->getAabb(newChildWorldTrans,aabbMin0,aabbMax0);
-		m_otherObj->getCollisionShape()->getAabb(m_otherObj->getWorldTransform(),aabbMin1,aabbMax1);
+		m_otherObjWrap->getCollisionShape()->getAabb(m_otherObjWrap->getWorldTransform(),aabbMin1,aabbMax1);
 
 		if (TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
 		{
 
-			m_compoundColObj->setWorldTransform( newChildWorldTrans);
-			m_compoundColObj->setInterpolationWorldTransform(newChildWorldTrans);
+			btCollisionObjectWrapper compoundWrap(this->m_compoundColObjWrap,childShape,m_compoundColObjWrap->getCollisionObject(),newChildWorldTrans);
+
 
 			//the contactpoint is still projected back using the original inverted worldtrans
-			btCollisionShape* tmpShape = m_compoundColObj->getCollisionShape();
-			m_compoundColObj->internalSetTemporaryCollisionShape( childShape );
-
 			if (!m_childCollisionAlgorithms[index])
-				m_childCollisionAlgorithms[index] = m_dispatcher->findAlgorithm(m_compoundColObj,m_otherObj,m_sharedManifold);
+				m_childCollisionAlgorithms[index] = m_dispatcher->findAlgorithm(&compoundWrap,m_otherObjWrap,m_sharedManifold);
+
+			
+			const btCollisionObjectWrapper* tmpWrap = 0;
 
 			///detect swapping case
-			if (m_resultOut->getBody0Internal() == m_compoundColObj)
+			if (m_resultOut->getBody0Internal() == m_compoundColObjWrap->getCollisionObject())
 			{
+				tmpWrap = m_resultOut->getBody0Wrap();
+				m_resultOut->setBody0Wrap(&compoundWrap);
 				m_resultOut->setShapeIdentifiersA(-1,index);
 			} else
 			{
+				tmpWrap = m_resultOut->getBody1Wrap();
+				m_resultOut->setBody1Wrap(&compoundWrap);
 				m_resultOut->setShapeIdentifiersB(-1,index);
 			}
 
-			m_childCollisionAlgorithms[index]->processCollision(m_compoundColObj,m_otherObj,m_dispatchInfo,m_resultOut);
+
+			m_childCollisionAlgorithms[index]->processCollision(&compoundWrap,m_otherObjWrap,m_dispatchInfo,m_resultOut);
+
+#if 0
 			if (m_dispatchInfo.m_debugDraw && (m_dispatchInfo.m_debugDraw->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
 			{
 				btVector3 worldAabbMin,worldAabbMax;
 				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin0,aabbMax0,btVector3(1,1,1));
 				m_dispatchInfo.m_debugDraw->drawAabb(aabbMin1,aabbMax1,btVector3(1,1,1));
 			}
+#endif
+
+			if (m_resultOut->getBody0Internal() == m_compoundColObjWrap->getCollisionObject())
+			{
+				m_resultOut->setBody0Wrap(tmpWrap);
+			} else
+			{
+				m_resultOut->setBody1Wrap(tmpWrap);
+			}
 			
-			//revert back transform
-			m_compoundColObj->internalSetTemporaryCollisionShape( tmpShape);
-			m_compoundColObj->setWorldTransform(  orgTrans );
-			m_compoundColObj->setInterpolationWorldTransform(orgInterpolationTrans);
 		}
 	}
 	void		Process(const btDbvtNode* leaf)
 	{
 		int index = leaf->dataAsInt;
 
-		btCompoundShape* compoundShape = static_cast<btCompoundShape*>(m_compoundColObj->getCollisionShape());
-		btCollisionShape* childShape = compoundShape->getChildShape(index);
+		const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(m_compoundColObjWrap->getCollisionShape());
+		const btCollisionShape* childShape = compoundShape->getChildShape(index);
+
+#if 0
 		if (m_dispatchInfo.m_debugDraw && (m_dispatchInfo.m_debugDraw->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
 		{
 			btVector3 worldAabbMin,worldAabbMax;
-			btTransform	orgTrans = m_compoundColObj->getWorldTransform();
+			btTransform	orgTrans = m_compoundColObjWrap->getWorldTransform();
 			btTransformAabb(leaf->volume.Mins(),leaf->volume.Maxs(),0.,orgTrans,worldAabbMin,worldAabbMax);
 			m_dispatchInfo.m_debugDraw->drawAabb(worldAabbMin,worldAabbMax,btVector3(1,0,0));
 		}
+#endif
+
 		ProcessChildShape(childShape,index);
 
 	}
@@ -188,15 +204,13 @@ public:
 
 
 
-void btCompoundCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btCompoundCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
-	btCollisionObject* colObj = m_isSwapped? body1 : body0;
-	btCollisionObject* otherObj = m_isSwapped? body0 : body1;
+	const btCollisionObjectWrapper* colObjWrap = m_isSwapped? body1Wrap : body0Wrap;
+	const btCollisionObjectWrapper* otherObjWrap = m_isSwapped? body0Wrap : body1Wrap;
 
-	
-
-	btAssert (colObj->getCollisionShape()->isCompound());
-	btCompoundShape* compoundShape = static_cast<btCompoundShape*>(colObj->getCollisionShape());
+	btAssert (colObjWrap->getCollisionShape()->isCompound());
+	const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(colObjWrap->getCollisionShape());
 
 	///btCompoundShape might have changed:
 	////make sure the internal child collision algorithm caches are still valid
@@ -205,13 +219,13 @@ void btCompoundCollisionAlgorithm::processCollision (btCollisionObject* body0,bt
 		///clear and update all
 		removeChildAlgorithms();
 		
-		preallocateChildAlgorithms(body0,body1);
+		preallocateChildAlgorithms(body0Wrap,body1Wrap);
 	}
 
 
-	btDbvt* tree = compoundShape->getDynamicAabbTree();
+	const btDbvt* tree = compoundShape->getDynamicAabbTree();
 	//use a dynamic aabb tree to cull potential child-overlaps
-	btCompoundLeafCallback  callback(colObj,otherObj,m_dispatcher,dispatchInfo,resultOut,&m_childCollisionAlgorithms[0],m_sharedManifold);
+	btCompoundLeafCallback  callback(colObjWrap,otherObjWrap,m_dispatcher,dispatchInfo,resultOut,&m_childCollisionAlgorithms[0],m_sharedManifold);
 
 	///we need to refresh all contact manifolds
 	///note that we should actually recursively traverse all children, btCompoundShape can nested more then 1 level deep
@@ -233,7 +247,7 @@ void btCompoundCollisionAlgorithm::processCollision (btCollisionObject* body0,bt
 						resultOut->setPersistentManifold(0);//??necessary?
 					}
 				}
-				manifoldArray.clear();
+				manifoldArray.resize(0);
 			}
 		}
 	}
@@ -243,8 +257,8 @@ void btCompoundCollisionAlgorithm::processCollision (btCollisionObject* body0,bt
 
 		btVector3 localAabbMin,localAabbMax;
 		btTransform otherInCompoundSpace;
-		otherInCompoundSpace = colObj->getWorldTransform().inverse() * otherObj->getWorldTransform();
-		otherObj->getCollisionShape()->getAabb(otherInCompoundSpace,localAabbMin,localAabbMax);
+		otherInCompoundSpace = colObjWrap->getWorldTransform().inverse() * otherObjWrap->getWorldTransform();
+		otherObjWrap->getCollisionShape()->getAabb(otherInCompoundSpace,localAabbMin,localAabbMax);
 
 		const ATTRIBUTE_ALIGNED16(btDbvtVolume)	bounds=btDbvtVolume::FromMM(localAabbMin,localAabbMax);
 		//process all children, that overlap with  the given AABB bounds
@@ -266,22 +280,26 @@ void btCompoundCollisionAlgorithm::processCollision (btCollisionObject* body0,bt
 		int numChildren = m_childCollisionAlgorithms.size();
 		int i;
 		btManifoldArray	manifoldArray;
-
+        const btCollisionShape* childShape = 0;
+        btTransform	orgTrans;
+        btTransform	orgInterpolationTrans;
+        btTransform	newChildWorldTrans;
+        btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;        
+        
 		for (i=0;i<numChildren;i++)
 		{
 			if (m_childCollisionAlgorithms[i])
 			{
-				btCollisionShape* childShape = compoundShape->getChildShape(i);
+				childShape = compoundShape->getChildShape(i);
 			//if not longer overlapping, remove the algorithm
-				btTransform	orgTrans = colObj->getWorldTransform();
-				btTransform	orgInterpolationTrans = colObj->getInterpolationWorldTransform();
+                orgTrans = colObjWrap->getWorldTransform();
+                orgInterpolationTrans = colObjWrap->getWorldTransform();
 				const btTransform& childTrans = compoundShape->getChildTransform(i);
-				btTransform	newChildWorldTrans = orgTrans*childTrans ;
+                newChildWorldTrans = orgTrans*childTrans ;
 
 				//perform an AABB check first
-				btVector3 aabbMin0,aabbMax0,aabbMin1,aabbMax1;
 				childShape->getAabb(newChildWorldTrans,aabbMin0,aabbMax0);
-				otherObj->getCollisionShape()->getAabb(otherObj->getWorldTransform(),aabbMin1,aabbMax1);
+				otherObjWrap->getCollisionShape()->getAabb(otherObjWrap->getWorldTransform(),aabbMin1,aabbMax1);
 
 				if (!TestAabbAgainstAabb2(aabbMin0,aabbMax0,aabbMin1,aabbMax1))
 				{
@@ -289,19 +307,15 @@ void btCompoundCollisionAlgorithm::processCollision (btCollisionObject* body0,bt
 					m_dispatcher->freeCollisionAlgorithm(m_childCollisionAlgorithms[i]);
 					m_childCollisionAlgorithms[i] = 0;
 				}
-
 			}
-			
 		}
-
-		
-
 	}
 }
 
 btScalar	btCompoundCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
-
+	btAssert(0);
+	//needs to be fixed, using btCollisionObjectWrapper and NOT modifying internal data structures
 	btCollisionObject* colObj = m_isSwapped? body1 : body0;
 	btCollisionObject* otherObj = m_isSwapped? body0 : body1;
 
@@ -320,27 +334,28 @@ btScalar	btCompoundCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject*
 
 	int numChildren = m_childCollisionAlgorithms.size();
 	int i;
+    btTransform	orgTrans;
+    btScalar frac;
 	for (i=0;i<numChildren;i++)
 	{
-		//temporarily exchange parent btCollisionShape with childShape, and recurse
-		btCollisionShape* childShape = compoundShape->getChildShape(i);
+		//btCollisionShape* childShape = compoundShape->getChildShape(i);
 
 		//backup
-		btTransform	orgTrans = colObj->getWorldTransform();
+        orgTrans = colObj->getWorldTransform();
 	
 		const btTransform& childTrans = compoundShape->getChildTransform(i);
 		//btTransform	newChildWorldTrans = orgTrans*childTrans ;
 		colObj->setWorldTransform( orgTrans*childTrans );
 
-		btCollisionShape* tmpShape = colObj->getCollisionShape();
-		colObj->internalSetTemporaryCollisionShape( childShape );
-		btScalar frac = m_childCollisionAlgorithms[i]->calculateTimeOfImpact(colObj,otherObj,dispatchInfo,resultOut);
+		//btCollisionShape* tmpShape = colObj->getCollisionShape();
+		//colObj->internalSetTemporaryCollisionShape( childShape );
+        frac = m_childCollisionAlgorithms[i]->calculateTimeOfImpact(colObj,otherObj,dispatchInfo,resultOut);
 		if (frac<hitFraction)
 		{
 			hitFraction = frac;
 		}
 		//revert back
-		colObj->internalSetTemporaryCollisionShape( tmpShape);
+		//colObj->internalSetTemporaryCollisionShape( tmpShape);
 		colObj->setWorldTransform( orgTrans);
 	}
 	return hitFraction;
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h
index 255e0af66..b16fc5246 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COMPOUND_COLLISION_ALGORITHM_H
-#define COMPOUND_COLLISION_ALGORITHM_H
+#ifndef BT_COMPOUND_COLLISION_ALGORITHM_H
+#define BT_COMPOUND_COLLISION_ALGORITHM_H
 
 #include "btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btDispatcher.h"
@@ -41,15 +41,15 @@ class btCompoundCollisionAlgorithm  : public btActivatingCollisionAlgorithm
 	
 	void	removeChildAlgorithms();
 	
-	void	preallocateChildAlgorithms(btCollisionObject* body0,btCollisionObject* body1);
+	void	preallocateChildAlgorithms(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap);
 
 public:
 
-	btCompoundCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+	btCompoundCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped);
 
 	virtual ~btCompoundCollisionAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -65,22 +65,22 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btCompoundCollisionAlgorithm));
-			return new(mem) btCompoundCollisionAlgorithm(ci,body0,body1,false);
+			return new(mem) btCompoundCollisionAlgorithm(ci,body0Wrap,body1Wrap,false);
 		}
 	};
 
 	struct SwappedCreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btCompoundCollisionAlgorithm));
-			return new(mem) btCompoundCollisionAlgorithm(ci,body0,body1,true);
+			return new(mem) btCompoundCollisionAlgorithm(ci,body0Wrap,body1Wrap,true);
 		}
 	};
 
 };
 
-#endif //COMPOUND_COLLISION_ALGORITHM_H
+#endif //BT_COMPOUND_COLLISION_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp
index db7f884ac..3e1afede1 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp
@@ -43,7 +43,7 @@ subject to the following restrictions:
 
 #include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
 #include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
-
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 btConvex2dConvex2dAlgorithm::CreateFunc::CreateFunc(btSimplexSolverInterface*			simplexSolver, btConvexPenetrationDepthSolver* pdSolver)
 {
@@ -57,8 +57,8 @@ btConvex2dConvex2dAlgorithm::CreateFunc::~CreateFunc()
 { 
 }
 
-btConvex2dConvex2dAlgorithm::btConvex2dConvex2dAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver,int numPerturbationIterations, int minimumPointsPerturbationThreshold)
-: btActivatingCollisionAlgorithm(ci,body0,body1),
+btConvex2dConvex2dAlgorithm::btConvex2dConvex2dAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver,int numPerturbationIterations, int minimumPointsPerturbationThreshold)
+: btActivatingCollisionAlgorithm(ci,body0Wrap,body1Wrap),
 m_simplexSolver(simplexSolver),
 m_pdSolver(pdSolver),
 m_ownManifold (false),
@@ -67,8 +67,8 @@ m_lowLevelOfDetail(false),
  m_numPerturbationIterations(numPerturbationIterations),
 m_minimumPointsPerturbationThreshold(minimumPointsPerturbationThreshold)
 {
-	(void)body0;
-	(void)body1;
+	(void)body0Wrap;
+	(void)body1Wrap;
 }
 
 
@@ -96,13 +96,13 @@ extern btScalar gContactBreakingThreshold;
 //
 // Convex-Convex collision algorithm
 //
-void btConvex2dConvex2dAlgorithm ::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btConvex2dConvex2dAlgorithm ::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 
 	if (!m_manifoldPtr)
 	{
 		//swapped?
-		m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
+		m_manifoldPtr = m_dispatcher->getNewManifold(body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 	resultOut->setPersistentManifold(m_manifoldPtr);
@@ -111,8 +111,8 @@ void btConvex2dConvex2dAlgorithm ::processCollision (btCollisionObject* body0,bt
 	//resultOut->getPersistentManifold()->clearManifold();
 
 
-	btConvexShape* min0 = static_cast<btConvexShape*>(body0->getCollisionShape());
-	btConvexShape* min1 = static_cast<btConvexShape*>(body1->getCollisionShape());
+	const btConvexShape* min0 = static_cast<const btConvexShape*>(body0Wrap->getCollisionShape());
+	const btConvexShape* min1 = static_cast<const btConvexShape*>(body1Wrap->getCollisionShape());
 
 	btVector3  normalOnB;
 	btVector3  pointOnBWorld;
@@ -133,8 +133,8 @@ void btConvex2dConvex2dAlgorithm ::processCollision (btCollisionObject* body0,bt
 		}
 
 		input.m_stackAlloc = dispatchInfo.m_stackAllocator;
-		input.m_transformA = body0->getWorldTransform();
-		input.m_transformB = body1->getWorldTransform();
+		input.m_transformA = body0Wrap->getWorldTransform();
+		input.m_transformB = body1Wrap->getWorldTransform();
 
 		gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h
index 573840140..18d9385a1 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONVEX_2D_CONVEX_2D_ALGORITHM_H
-#define CONVEX_2D_CONVEX_2D_ALGORITHM_H
+#ifndef BT_CONVEX_2D_CONVEX_2D_ALGORITHM_H
+#define BT_CONVEX_2D_CONVEX_2D_ALGORITHM_H
 
 #include "BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
@@ -45,12 +45,12 @@ class btConvex2dConvex2dAlgorithm : public btActivatingCollisionAlgorithm
 
 public:
 
-	btConvex2dConvex2dAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold);
+	btConvex2dConvex2dAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold);
 
 
 	virtual ~btConvex2dConvex2dAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -82,14 +82,14 @@ public:
 		
 		virtual ~CreateFunc();
 
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvex2dConvex2dAlgorithm));
-			return new(mem) btConvex2dConvex2dAlgorithm(ci.m_manifold,ci,body0,body1,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+			return new(mem) btConvex2dConvex2dAlgorithm(ci.m_manifold,ci,body0Wrap,body1Wrap,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
 		}
 	};
 
 
 };
 
-#endif //CONVEX_2D_CONVEX_2D_ALGORITHM_H
+#endif //BT_CONVEX_2D_CONVEX_2D_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp
index 6a556195b..6905e9737 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp
@@ -25,11 +25,12 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btSphereShape.h"
 #include "LinearMath/btIDebugDraw.h"
 #include "BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
-btConvexConcaveCollisionAlgorithm::btConvexConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1,bool isSwapped)
-: btActivatingCollisionAlgorithm(ci,body0,body1),
+btConvexConcaveCollisionAlgorithm::btConvexConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped)
+: btActivatingCollisionAlgorithm(ci,body0Wrap,body1Wrap),
 m_isSwapped(isSwapped),
-m_btConvexTriangleCallback(ci.m_dispatcher1,body0,body1,isSwapped)
+m_btConvexTriangleCallback(ci.m_dispatcher1,body0Wrap,body1Wrap,isSwapped)
 {
 }
 
@@ -46,17 +47,17 @@ void	btConvexConcaveCollisionAlgorithm::getAllContactManifolds(btManifoldArray&
 }
 
 
-btConvexTriangleCallback::btConvexTriangleCallback(btDispatcher*  dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped):
+btConvexTriangleCallback::btConvexTriangleCallback(btDispatcher*  dispatcher,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped):
 	  m_dispatcher(dispatcher),
 	m_dispatchInfoPtr(0)
 {
-	m_convexBody = isSwapped? body1:body0;
-	m_triBody = isSwapped? body0:body1;
+	m_convexBodyWrap = isSwapped? body1Wrap:body0Wrap;
+	m_triBodyWrap = isSwapped? body0Wrap:body1Wrap;
 	
 	  //
 	  // create the manifold from the dispatcher 'manifold pool'
 	  //
-	  m_manifoldPtr = m_dispatcher->getNewManifold(m_convexBody,m_triBody);
+	  m_manifoldPtr = m_dispatcher->getNewManifold(m_convexBodyWrap->getCollisionObject(),m_triBodyWrap->getCollisionObject());
 
   	  clearCache();
 }
@@ -88,66 +89,78 @@ void btConvexTriangleCallback::processTriangle(btVector3* triangle,int partId, i
 	btCollisionAlgorithmConstructionInfo ci;
 	ci.m_dispatcher1 = m_dispatcher;
 
-	btCollisionObject* ob = static_cast<btCollisionObject*>(m_triBody);
+	//const btCollisionObject* ob = static_cast<btCollisionObject*>(m_triBodyWrap->getCollisionObject());
 
 
-	
+#if 0	
 	///debug drawing of the overlapping triangles
 	if (m_dispatchInfoPtr && m_dispatchInfoPtr->m_debugDraw && (m_dispatchInfoPtr->m_debugDraw->getDebugMode() &btIDebugDraw::DBG_DrawWireframe ))
 	{
-		btVector3 color(255,255,0);
+		btVector3 color(1,1,0);
 		btTransform& tr = ob->getWorldTransform();
 		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[0]),tr(triangle[1]),color);
 		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[1]),tr(triangle[2]),color);
 		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[2]),tr(triangle[0]),color);
-
-		//btVector3 center = triangle[0] + triangle[1]+triangle[2];
-		//center *= btScalar(0.333333);
-		//m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[0]),tr(center),color);
-		//m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[1]),tr(center),color);
-		//m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[2]),tr(center),color);
-
 	}
-
-
-	//btCollisionObject* colObj = static_cast<btCollisionObject*>(m_convexProxy->m_clientObject);
+#endif
 	
-	if (m_convexBody->getCollisionShape()->isConvex())
+	if (m_convexBodyWrap->getCollisionShape()->isConvex())
 	{
 		btTriangleShape tm(triangle[0],triangle[1],triangle[2]);	
 		tm.setMargin(m_collisionMarginTriangle);
 		
-		btCollisionShape* tmpShape = ob->getCollisionShape();
-		ob->internalSetTemporaryCollisionShape( &tm );
 		
-		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(m_convexBody,m_triBody,m_manifoldPtr);
-		///this should use the btDispatcher, so the actual registered algorithm is used
-		//		btConvexConvexAlgorithm cvxcvxalgo(m_manifoldPtr,ci,m_convexBody,m_triBody);
+		btCollisionObjectWrapper triObWrap(m_triBodyWrap,&tm,m_triBodyWrap->getCollisionObject(),m_triBodyWrap->getWorldTransform());//correct transform?
+		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(m_convexBodyWrap,&triObWrap,m_manifoldPtr);
 
-		m_resultOut->setShapeIdentifiersB(partId,triangleIndex);
+		const btCollisionObjectWrapper* tmpWrap = 0;
+
+		if (m_resultOut->getBody0Internal() == m_triBodyWrap->getCollisionObject())
+		{
+			tmpWrap = m_resultOut->getBody0Wrap();
+			m_resultOut->setBody0Wrap(&triObWrap);
+			m_resultOut->setShapeIdentifiersA(partId,triangleIndex);
+		}
+		else
+		{
+			tmpWrap = m_resultOut->getBody1Wrap();
+			m_resultOut->setBody1Wrap(&triObWrap);
+			m_resultOut->setShapeIdentifiersB(partId,triangleIndex);
+		}
 	
-//		cvxcvxalgo.processCollision(m_convexBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
-		colAlgo->processCollision(m_convexBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
+		colAlgo->processCollision(m_convexBodyWrap,&triObWrap,*m_dispatchInfoPtr,m_resultOut);
+
+		if (m_resultOut->getBody0Internal() == m_triBodyWrap->getCollisionObject())
+		{
+			m_resultOut->setBody0Wrap(tmpWrap);
+		} else
+		{
+			m_resultOut->setBody1Wrap(tmpWrap);
+		}
+		
+
+
 		colAlgo->~btCollisionAlgorithm();
 		ci.m_dispatcher1->freeCollisionAlgorithm(colAlgo);
-		ob->internalSetTemporaryCollisionShape( tmpShape);
 	}
 
-
 }
 
 
 
-void	btConvexTriangleCallback::setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void	btConvexTriangleCallback::setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,const btCollisionObjectWrapper* convexBodyWrap, const btCollisionObjectWrapper* triBodyWrap, btManifoldResult* resultOut)
 {
+	m_convexBodyWrap = convexBodyWrap;
+	m_triBodyWrap = triBodyWrap;
+
 	m_dispatchInfoPtr = &dispatchInfo;
 	m_collisionMarginTriangle = collisionMarginTriangle;
 	m_resultOut = resultOut;
 
 	//recalc aabbs
 	btTransform convexInTriangleSpace;
-	convexInTriangleSpace = m_triBody->getWorldTransform().inverse() * m_convexBody->getWorldTransform();
-	btCollisionShape* convexShape = static_cast<btCollisionShape*>(m_convexBody->getCollisionShape());
+	convexInTriangleSpace = m_triBodyWrap->getWorldTransform().inverse() * m_convexBodyWrap->getWorldTransform();
+	const btCollisionShape* convexShape = static_cast<const btCollisionShape*>(m_convexBodyWrap->getCollisionShape());
 	//CollisionShape* triangleShape = static_cast<btCollisionShape*>(triBody->m_collisionShape);
 	convexShape->getAabb(convexInTriangleSpace,m_aabbMin,m_aabbMax);
 	btScalar extraMargin = collisionMarginTriangle;
@@ -164,35 +177,34 @@ void btConvexConcaveCollisionAlgorithm::clearCache()
 
 }
 
-void btConvexConcaveCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btConvexConcaveCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	
 	
-	btCollisionObject* convexBody = m_isSwapped ? body1 : body0;
-	btCollisionObject* triBody = m_isSwapped ? body0 : body1;
+	const btCollisionObjectWrapper* convexBodyWrap = m_isSwapped ? body1Wrap : body0Wrap;
+	const btCollisionObjectWrapper* triBodyWrap = m_isSwapped ? body0Wrap : body1Wrap;
 
-	if (triBody->getCollisionShape()->isConcave())
+	if (triBodyWrap->getCollisionShape()->isConcave())
 	{
 
 
-		btCollisionObject*	triOb = triBody;
-		btConcaveShape* concaveShape = static_cast<btConcaveShape*>( triOb->getCollisionShape());
 		
-		if (convexBody->getCollisionShape()->isConvex())
+		const btConcaveShape* concaveShape = static_cast<const btConcaveShape*>( triBodyWrap->getCollisionShape());
+		
+		if (convexBodyWrap->getCollisionShape()->isConvex())
 		{
 			btScalar collisionMarginTriangle = concaveShape->getMargin();
 					
 			resultOut->setPersistentManifold(m_btConvexTriangleCallback.m_manifoldPtr);
-			m_btConvexTriangleCallback.setTimeStepAndCounters(collisionMarginTriangle,dispatchInfo,resultOut);
+			m_btConvexTriangleCallback.setTimeStepAndCounters(collisionMarginTriangle,dispatchInfo,convexBodyWrap,triBodyWrap,resultOut);
 
-			//Disable persistency. previously, some older algorithm calculated all contacts in one go, so you can clear it here.
-			//m_dispatcher->clearManifold(m_btConvexTriangleCallback.m_manifoldPtr);
-
-			m_btConvexTriangleCallback.m_manifoldPtr->setBodies(convexBody,triBody);
+			m_btConvexTriangleCallback.m_manifoldPtr->setBodies(convexBodyWrap->getCollisionObject(),triBodyWrap->getCollisionObject());
 
 			concaveShape->processAllTriangles( &m_btConvexTriangleCallback,m_btConvexTriangleCallback.getAabbMin(),m_btConvexTriangleCallback.getAabbMax());
 			
 			resultOut->refreshContactPoints();
+
+			m_btConvexTriangleCallback.clearWrapperData();
 	
 		}
 	
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h
index 984a4c39e..e90d06eb1 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONVEX_CONCAVE_COLLISION_ALGORITHM_H
-#define CONVEX_CONCAVE_COLLISION_ALGORITHM_H
+#ifndef BT_CONVEX_CONCAVE_COLLISION_ALGORITHM_H
+#define BT_CONVEX_CONCAVE_COLLISION_ALGORITHM_H
 
 #include "btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btDispatcher.h"
@@ -28,8 +28,8 @@ class btDispatcher;
 ///For each triangle in the concave mesh that overlaps with the AABB of a convex (m_convexProxy), processTriangle is called.
 class btConvexTriangleCallback : public btTriangleCallback
 {
-	btCollisionObject* m_convexBody;
-	btCollisionObject* m_triBody;
+	const btCollisionObjectWrapper* m_convexBodyWrap;
+	const btCollisionObjectWrapper* m_triBodyWrap;
 
 	btVector3	m_aabbMin;
 	btVector3	m_aabbMax ;
@@ -45,10 +45,15 @@ int	m_triangleCount;
 	
 	btPersistentManifold*	m_manifoldPtr;
 
-	btConvexTriangleCallback(btDispatcher* dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+	btConvexTriangleCallback(btDispatcher* dispatcher,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped);
 
-	void	setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	void	setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,const btCollisionObjectWrapper* convexBodyWrap, const btCollisionObjectWrapper* triBodyWrap, btManifoldResult* resultOut);
 
+	void	clearWrapperData()
+	{
+		m_convexBodyWrap = 0;
+		m_triBodyWrap = 0;
+	}
 	virtual ~btConvexTriangleCallback();
 
 	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex);
@@ -81,11 +86,11 @@ class btConvexConcaveCollisionAlgorithm  : public btActivatingCollisionAlgorithm
 
 public:
 
-	btConvexConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+	btConvexConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped);
 
 	virtual ~btConvexConcaveCollisionAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -95,22 +100,22 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexConcaveCollisionAlgorithm));
-			return new(mem) btConvexConcaveCollisionAlgorithm(ci,body0,body1,false);
+			return new(mem) btConvexConcaveCollisionAlgorithm(ci,body0Wrap,body1Wrap,false);
 		}
 	};
 
 	struct SwappedCreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexConcaveCollisionAlgorithm));
-			return new(mem) btConvexConcaveCollisionAlgorithm(ci,body0,body1,true);
+			return new(mem) btConvexConcaveCollisionAlgorithm(ci,body0Wrap,body1Wrap,true);
 		}
 	};
 
 };
 
-#endif //CONVEX_CONCAVE_COLLISION_ALGORITHM_H
+#endif //BT_CONVEX_CONCAVE_COLLISION_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp
index 152506e58..62f98a846 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp
@@ -17,6 +17,7 @@ subject to the following restrictions:
 ///If you experience problems with capsule-capsule collision, try to define BT_DISABLE_CAPSULE_CAPSULE_COLLIDER and report it in the Bullet forums
 ///with reproduction case
 //define BT_DISABLE_CAPSULE_CAPSULE_COLLIDER 1
+//#define ZERO_MARGIN
 
 #include "btConvexConvexAlgorithm.h"
 
@@ -26,6 +27,8 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
 #include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+
 
 
 #include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
@@ -48,8 +51,8 @@ subject to the following restrictions:
 
 #include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
 #include "BulletCollision/NarrowPhaseCollision/btGjkEpaPenetrationDepthSolver.h"
-
-
+#include "BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 ///////////
 
@@ -188,8 +191,8 @@ btConvexConvexAlgorithm::CreateFunc::~CreateFunc()
 { 
 }
 
-btConvexConvexAlgorithm::btConvexConvexAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver,int numPerturbationIterations, int minimumPointsPerturbationThreshold)
-: btActivatingCollisionAlgorithm(ci,body0,body1),
+btConvexConvexAlgorithm::btConvexConvexAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver,int numPerturbationIterations, int minimumPointsPerturbationThreshold)
+: btActivatingCollisionAlgorithm(ci,body0Wrap,body1Wrap),
 m_simplexSolver(simplexSolver),
 m_pdSolver(pdSolver),
 m_ownManifold (false),
@@ -202,8 +205,8 @@ m_sepDistance((static_cast<btConvexShape*>(body0->getCollisionShape()))->getAngu
 m_numPerturbationIterations(numPerturbationIterations),
 m_minimumPointsPerturbationThreshold(minimumPointsPerturbationThreshold)
 {
-	(void)body0;
-	(void)body1;
+	(void)body0Wrap;
+	(void)body1Wrap;
 }
 
 
@@ -238,8 +241,8 @@ struct btPerturbedContactResult : public btManifoldResult
 		:m_originalManifoldResult(originalResult),
 		m_transformA(transformA),
 		m_transformB(transformB),
-		m_perturbA(perturbA),
 		m_unPerturbedTransform(unPerturbedTransform),
+		m_perturbA(perturbA),
 		m_debugDrawer(debugDrawer)
 	{
 	}
@@ -286,13 +289,13 @@ extern btScalar gContactBreakingThreshold;
 //
 // Convex-Convex collision algorithm
 //
-void btConvexConvexAlgorithm ::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btConvexConvexAlgorithm ::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 
 	if (!m_manifoldPtr)
 	{
 		//swapped?
-		m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
+		m_manifoldPtr = m_dispatcher->getNewManifold(body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 	resultOut->setPersistentManifold(m_manifoldPtr);
@@ -301,8 +304,8 @@ void btConvexConvexAlgorithm ::processCollision (btCollisionObject* body0,btColl
 	//resultOut->getPersistentManifold()->clearManifold();
 	
 
-	btConvexShape* min0 = static_cast<btConvexShape*>(body0->getCollisionShape());
-	btConvexShape* min1 = static_cast<btConvexShape*>(body1->getCollisionShape());
+	const btConvexShape* min0 = static_cast<const btConvexShape*>(body0Wrap->getCollisionShape());
+	const btConvexShape* min1 = static_cast<const btConvexShape*>(body1Wrap->getCollisionShape());
 
 	btVector3  normalOnB;
 		btVector3  pointOnBWorld;
@@ -311,14 +314,14 @@ void btConvexConvexAlgorithm ::processCollision (btCollisionObject* body0,btColl
 	{
 		btCapsuleShape* capsuleA = (btCapsuleShape*) min0;
 		btCapsuleShape* capsuleB = (btCapsuleShape*) min1;
-		btVector3 localScalingA = capsuleA->getLocalScaling();
-		btVector3 localScalingB = capsuleB->getLocalScaling();
+	//	btVector3 localScalingA = capsuleA->getLocalScaling();
+	//	btVector3 localScalingB = capsuleB->getLocalScaling();
 		
 		btScalar threshold = m_manifoldPtr->getContactBreakingThreshold();
 
 		btScalar dist = capsuleCapsuleDistance(normalOnB,	pointOnBWorld,capsuleA->getHalfHeight(),capsuleA->getRadius(),
 			capsuleB->getHalfHeight(),capsuleB->getRadius(),capsuleA->getUpAxis(),capsuleB->getUpAxis(),
-			body0->getWorldTransform(),body1->getWorldTransform(),threshold);
+			body0Wrap->getWorldTransform(),body1Wrap->getWorldTransform(),threshold);
 
 		if (dist<threshold)
 		{
@@ -331,8 +334,14 @@ void btConvexConvexAlgorithm ::processCollision (btCollisionObject* body0,btColl
 #endif //BT_DISABLE_CAPSULE_CAPSULE_COLLIDER
 
 
+
+
 #ifdef USE_SEPDISTANCE_UTIL2
-	m_sepDistance.updateSeparatingDistance(body0->getWorldTransform(),body1->getWorldTransform());
+	if (dispatchInfo.m_useConvexConservativeDistanceUtil)
+	{
+		m_sepDistance.updateSeparatingDistance(body0->getWorldTransform(),body1->getWorldTransform());
+	}
+
 	if (!dispatchInfo.m_useConvexConservativeDistanceUtil || m_sepDistance.getConservativeSeparatingDistance()<=0.f)
 #endif //USE_SEPDISTANCE_UTIL2
 
@@ -353,18 +362,23 @@ void btConvexConvexAlgorithm ::processCollision (btCollisionObject* body0,btColl
 	} else
 #endif //USE_SEPDISTANCE_UTIL2
 	{
+		//if (dispatchInfo.m_convexMaxDistanceUseCPT)
+		//{
+		//	input.m_maximumDistanceSquared = min0->getMargin() + min1->getMargin() + m_manifoldPtr->getContactProcessingThreshold();
+		//} else
+		//{
 		input.m_maximumDistanceSquared = min0->getMargin() + min1->getMargin() + m_manifoldPtr->getContactBreakingThreshold();
+//		}
+
 		input.m_maximumDistanceSquared*= input.m_maximumDistanceSquared;
 	}
 
 	input.m_stackAlloc = dispatchInfo.m_stackAllocator;
-	input.m_transformA = body0->getWorldTransform();
-	input.m_transformB = body1->getWorldTransform();
+	input.m_transformA = body0Wrap->getWorldTransform();
+	input.m_transformB = body1Wrap->getWorldTransform();
+
 
-	gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
 
-	btVector3 v0,v1;
-	btVector3 sepNormalWorldSpace;
 	
 
 #ifdef USE_SEPDISTANCE_UTIL2
@@ -376,73 +390,278 @@ void btConvexConvexAlgorithm ::processCollision (btCollisionObject* body0,btColl
 		{
 			sepDist += dispatchInfo.m_convexConservativeDistanceThreshold;
 			//now perturbe directions to get multiple contact points
-			sepNormalWorldSpace = gjkPairDetector.getCachedSeparatingAxis().normalized();
-			btPlaneSpace1(sepNormalWorldSpace,v0,v1);
+			
 		}
 	}
 #endif //USE_SEPDISTANCE_UTIL2
 
+	if (min0->isPolyhedral() && min1->isPolyhedral())
+	{
+
+
+		struct btDummyResult : public btDiscreteCollisionDetectorInterface::Result
+		{
+			virtual void setShapeIdentifiersA(int partId0,int index0){}
+			virtual void setShapeIdentifiersB(int partId1,int index1){}
+			virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth) 
+			{
+			}
+		};
+
+		
+		struct btWithoutMarginResult : public btDiscreteCollisionDetectorInterface::Result
+		{
+			btDiscreteCollisionDetectorInterface::Result* m_originalResult;
+			btVector3	m_reportedNormalOnWorld;
+			btScalar m_marginOnA;
+			btScalar m_marginOnB;
+			btScalar	m_reportedDistance;
+			
+			bool		m_foundResult;
+			btWithoutMarginResult(btDiscreteCollisionDetectorInterface::Result* result, btScalar marginOnA, btScalar marginOnB)
+			:m_originalResult(result),
+			m_marginOnA(marginOnA),
+			m_marginOnB(marginOnB),
+			m_foundResult(false)
+			{
+			}
+			
+			virtual void setShapeIdentifiersA(int partId0,int index0){}
+			virtual void setShapeIdentifiersB(int partId1,int index1){}
+			virtual void addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorldOrg,btScalar depthOrg) 
+			{
+				m_reportedDistance = depthOrg;
+				m_reportedNormalOnWorld = normalOnBInWorld;
+				
+				btVector3 adjustedPointB = pointInWorldOrg - normalOnBInWorld*m_marginOnB;
+				m_reportedDistance = depthOrg+(m_marginOnA+m_marginOnB);
+				if (m_reportedDistance<0.f)
+				{
+					m_foundResult = true;					
+				}
+				m_originalResult->addContactPoint(normalOnBInWorld,adjustedPointB,m_reportedDistance);
+			}
+		};
+
+		
+		btDummyResult dummy;
+
+///btBoxShape is an exception: its vertices are created WITH margin so don't subtract it
+
+		btScalar min0Margin = min0->getShapeType()==BOX_SHAPE_PROXYTYPE? 0.f : min0->getMargin();
+		btScalar min1Margin = min1->getShapeType()==BOX_SHAPE_PROXYTYPE? 0.f : min1->getMargin();
+
+		btWithoutMarginResult	withoutMargin(resultOut, min0Margin,min1Margin);
+
+		btPolyhedralConvexShape* polyhedronA = (btPolyhedralConvexShape*) min0;
+		btPolyhedralConvexShape* polyhedronB = (btPolyhedralConvexShape*) min1;
+		if (polyhedronA->getConvexPolyhedron() && polyhedronB->getConvexPolyhedron())
+		{
+
+
+			
+
+			btScalar threshold = m_manifoldPtr->getContactBreakingThreshold();
+
+			btScalar minDist = -1e30f;
+			btVector3 sepNormalWorldSpace;
+			bool foundSepAxis  = true;
+
+			if (dispatchInfo.m_enableSatConvex)
+			{
+				foundSepAxis = btPolyhedralContactClipping::findSeparatingAxis(
+					*polyhedronA->getConvexPolyhedron(), *polyhedronB->getConvexPolyhedron(),
+					body0Wrap->getWorldTransform(), 
+					body1Wrap->getWorldTransform(),
+					sepNormalWorldSpace,*resultOut);
+			} else
+			{
+#ifdef ZERO_MARGIN
+				gjkPairDetector.setIgnoreMargin(true);
+				gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+#else
+
+
+				gjkPairDetector.getClosestPoints(input,withoutMargin,dispatchInfo.m_debugDraw);
+				//gjkPairDetector.getClosestPoints(input,dummy,dispatchInfo.m_debugDraw);
+#endif //ZERO_MARGIN
+				//btScalar l2 = gjkPairDetector.getCachedSeparatingAxis().length2();
+				//if (l2>SIMD_EPSILON)
+				{
+					sepNormalWorldSpace = withoutMargin.m_reportedNormalOnWorld;//gjkPairDetector.getCachedSeparatingAxis()*(1.f/l2);
+					//minDist = -1e30f;//gjkPairDetector.getCachedSeparatingDistance();
+					minDist = withoutMargin.m_reportedDistance;//gjkPairDetector.getCachedSeparatingDistance()+min0->getMargin()+min1->getMargin();
+	
+#ifdef ZERO_MARGIN
+					foundSepAxis = true;//gjkPairDetector.getCachedSeparatingDistance()<0.f;
+#else
+					foundSepAxis = withoutMargin.m_foundResult && minDist<0;//-(min0->getMargin()+min1->getMargin());
+#endif
+				}
+			}
+			if (foundSepAxis)
+			{
+				
+//				printf("sepNormalWorldSpace=%f,%f,%f\n",sepNormalWorldSpace.getX(),sepNormalWorldSpace.getY(),sepNormalWorldSpace.getZ());
+
+				btPolyhedralContactClipping::clipHullAgainstHull(sepNormalWorldSpace, *polyhedronA->getConvexPolyhedron(), *polyhedronB->getConvexPolyhedron(),
+					body0Wrap->getWorldTransform(), 
+					body1Wrap->getWorldTransform(), minDist-threshold, threshold, *resultOut);
+ 				
+			}
+			if (m_ownManifold)
+			{
+				resultOut->refreshContactPoints();
+			}
+			return;
+
+		} else
+		{
+			//we can also deal with convex versus triangle (without connectivity data)
+			if (polyhedronA->getConvexPolyhedron() && polyhedronB->getShapeType()==TRIANGLE_SHAPE_PROXYTYPE)
+			{
+
+				btVertexArray vertices;
+				btTriangleShape* tri = (btTriangleShape*)polyhedronB;
+				vertices.push_back(	body1Wrap->getWorldTransform()*tri->m_vertices1[0]);
+				vertices.push_back(	body1Wrap->getWorldTransform()*tri->m_vertices1[1]);
+				vertices.push_back(	body1Wrap->getWorldTransform()*tri->m_vertices1[2]);
+				
+				//tri->initializePolyhedralFeatures();
+
+				btScalar threshold = m_manifoldPtr->getContactBreakingThreshold();
+
+				btVector3 sepNormalWorldSpace;
+				btScalar minDist =-1e30f;
+				btScalar maxDist = threshold;
+				
+				bool foundSepAxis = false;
+				if (0)
+				{
+					polyhedronB->initializePolyhedralFeatures();
+					 foundSepAxis = btPolyhedralContactClipping::findSeparatingAxis(
+					*polyhedronA->getConvexPolyhedron(), *polyhedronB->getConvexPolyhedron(),
+					body0Wrap->getWorldTransform(), 
+					body1Wrap->getWorldTransform(),
+					sepNormalWorldSpace,*resultOut);
+				//	 printf("sepNormalWorldSpace=%f,%f,%f\n",sepNormalWorldSpace.getX(),sepNormalWorldSpace.getY(),sepNormalWorldSpace.getZ());
+
+				} else
+				{
+#ifdef ZERO_MARGIN
+					gjkPairDetector.setIgnoreMargin(true);
+					gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+#else
+					gjkPairDetector.getClosestPoints(input,dummy,dispatchInfo.m_debugDraw);
+#endif//ZERO_MARGIN
+					
+					btScalar l2 = gjkPairDetector.getCachedSeparatingAxis().length2();
+					if (l2>SIMD_EPSILON)
+					{
+						sepNormalWorldSpace = gjkPairDetector.getCachedSeparatingAxis()*(1.f/l2);
+						//minDist = gjkPairDetector.getCachedSeparatingDistance();
+						//maxDist = threshold;
+						minDist = gjkPairDetector.getCachedSeparatingDistance()-min0->getMargin()-min1->getMargin();
+						foundSepAxis = true;
+					}
+				}
+
+				
+			if (foundSepAxis)
+			{
+				btPolyhedralContactClipping::clipFaceAgainstHull(sepNormalWorldSpace, *polyhedronA->getConvexPolyhedron(), 
+					body0Wrap->getWorldTransform(), vertices, minDist-threshold, maxDist, *resultOut);
+			}
+				
+				
+				if (m_ownManifold)
+				{
+					resultOut->refreshContactPoints();
+				}
+				
+				return;
+			}
+			
+		}
+
+
+	}
+	
+	gjkPairDetector.getClosestPoints(input,*resultOut,dispatchInfo.m_debugDraw);
+
 	//now perform 'm_numPerturbationIterations' collision queries with the perturbated collision objects
 	
 	//perform perturbation when more then 'm_minimumPointsPerturbationThreshold' points
-	if (resultOut->getPersistentManifold()->getNumContacts() < m_minimumPointsPerturbationThreshold)
+	if (m_numPerturbationIterations && resultOut->getPersistentManifold()->getNumContacts() < m_minimumPointsPerturbationThreshold)
 	{
 		
 		int i;
-
-		bool perturbeA = true;
-		const btScalar angleLimit = 0.125f * SIMD_PI;
-		btScalar perturbeAngle;
-		btScalar radiusA = min0->getAngularMotionDisc();
-		btScalar radiusB = min1->getAngularMotionDisc();
-		if (radiusA < radiusB)
+		btVector3 v0,v1;
+		btVector3 sepNormalWorldSpace;
+		btScalar l2 = gjkPairDetector.getCachedSeparatingAxis().length2();
+	
+		if (l2>SIMD_EPSILON)
 		{
-			perturbeAngle = gContactBreakingThreshold /radiusA;
-			perturbeA = true;
-		} else
-		{
-			perturbeAngle = gContactBreakingThreshold / radiusB;
-			perturbeA = false;
-		}
-		if ( perturbeAngle > angleLimit ) 
-				perturbeAngle = angleLimit;
-
-		btTransform unPerturbedTransform;
-		if (perturbeA)
-		{
-			unPerturbedTransform = input.m_transformA;
-		} else
-		{
-			unPerturbedTransform = input.m_transformB;
-		}
-		
-		for ( i=0;i<m_numPerturbationIterations;i++)
-		{
-			btQuaternion perturbeRot(v0,perturbeAngle);
-			btScalar iterationAngle = i*(SIMD_2_PI/btScalar(m_numPerturbationIterations));
-			btQuaternion rotq(sepNormalWorldSpace,iterationAngle);
+			sepNormalWorldSpace = gjkPairDetector.getCachedSeparatingAxis()*(1.f/l2);
 			
-			
-			if (perturbeA)
+			btPlaneSpace1(sepNormalWorldSpace,v0,v1);
+
+
+			bool perturbeA = true;
+			const btScalar angleLimit = 0.125f * SIMD_PI;
+			btScalar perturbeAngle;
+			btScalar radiusA = min0->getAngularMotionDisc();
+			btScalar radiusB = min1->getAngularMotionDisc();
+			if (radiusA < radiusB)
 			{
-				input.m_transformA.setBasis(  btMatrix3x3(rotq.inverse()*perturbeRot*rotq)*body0->getWorldTransform().getBasis());
-				input.m_transformB = body1->getWorldTransform();
-#ifdef DEBUG_CONTACTS
-				dispatchInfo.m_debugDraw->drawTransform(input.m_transformA,10.0);
-#endif //DEBUG_CONTACTS
+				perturbeAngle = gContactBreakingThreshold /radiusA;
+				perturbeA = true;
 			} else
 			{
-				input.m_transformA = body0->getWorldTransform();
-				input.m_transformB.setBasis( btMatrix3x3(rotq.inverse()*perturbeRot*rotq)*body1->getWorldTransform().getBasis());
-#ifdef DEBUG_CONTACTS
-				dispatchInfo.m_debugDraw->drawTransform(input.m_transformB,10.0);
-#endif
+				perturbeAngle = gContactBreakingThreshold / radiusB;
+				perturbeA = false;
+			}
+			if ( perturbeAngle > angleLimit ) 
+					perturbeAngle = angleLimit;
+
+			btTransform unPerturbedTransform;
+			if (perturbeA)
+			{
+				unPerturbedTransform = input.m_transformA;
+			} else
+			{
+				unPerturbedTransform = input.m_transformB;
 			}
 			
-			btPerturbedContactResult perturbedResultOut(resultOut,input.m_transformA,input.m_transformB,unPerturbedTransform,perturbeA,dispatchInfo.m_debugDraw);
-			gjkPairDetector.getClosestPoints(input,perturbedResultOut,dispatchInfo.m_debugDraw);
-			
-			
+			for ( i=0;i<m_numPerturbationIterations;i++)
+			{
+				if (v0.length2()>SIMD_EPSILON)
+				{
+				btQuaternion perturbeRot(v0,perturbeAngle);
+				btScalar iterationAngle = i*(SIMD_2_PI/btScalar(m_numPerturbationIterations));
+				btQuaternion rotq(sepNormalWorldSpace,iterationAngle);
+				
+				
+				if (perturbeA)
+				{
+					input.m_transformA.setBasis(  btMatrix3x3(rotq.inverse()*perturbeRot*rotq)*body0Wrap->getWorldTransform().getBasis());
+					input.m_transformB = body1Wrap->getWorldTransform();
+	#ifdef DEBUG_CONTACTS
+					dispatchInfo.m_debugDraw->drawTransform(input.m_transformA,10.0);
+	#endif //DEBUG_CONTACTS
+				} else
+				{
+					input.m_transformA = body0Wrap->getWorldTransform();
+					input.m_transformB.setBasis( btMatrix3x3(rotq.inverse()*perturbeRot*rotq)*body1Wrap->getWorldTransform().getBasis());
+	#ifdef DEBUG_CONTACTS
+					dispatchInfo.m_debugDraw->drawTransform(input.m_transformB,10.0);
+	#endif
+				}
+				
+				btPerturbedContactResult perturbedResultOut(resultOut,input.m_transformA,input.m_transformB,unPerturbedTransform,perturbeA,dispatchInfo.m_debugDraw);
+				gjkPairDetector.getClosestPoints(input,perturbedResultOut,dispatchInfo.m_debugDraw);
+				}
+			}
 		}
 	}
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h
index 4d10ffca7..51db0c654 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONVEX_CONVEX_ALGORITHM_H
-#define CONVEX_CONVEX_ALGORITHM_H
+#ifndef BT_CONVEX_CONVEX_ALGORITHM_H
+#define BT_CONVEX_CONVEX_ALGORITHM_H
 
 #include "btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h"
@@ -32,7 +32,7 @@ class btConvexPenetrationDepthSolver;
 ///Either improve GJK for large size ratios (testing a 100 units versus a 0.1 unit object) or only enable the util
 ///for certain pairs that have a small size ratio
 
-#define USE_SEPDISTANCE_UTIL2 1
+//#define USE_SEPDISTANCE_UTIL2 1
 
 ///The convexConvexAlgorithm collision algorithm implements time of impact, convex closest points and penetration depth calculations between two convex objects.
 ///Multiple contact points are calculated by perturbing the orientation of the smallest object orthogonal to the separating normal.
@@ -59,12 +59,11 @@ class btConvexConvexAlgorithm : public btActivatingCollisionAlgorithm
 
 public:
 
-	btConvexConvexAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold);
-
+	btConvexConvexAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap, btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* pdSolver, int numPerturbationIterations, int minimumPointsPerturbationThreshold);
 
 	virtual ~btConvexConvexAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -96,14 +95,14 @@ public:
 		
 		virtual ~CreateFunc();
 
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexConvexAlgorithm));
-			return new(mem) btConvexConvexAlgorithm(ci.m_manifold,ci,body0,body1,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+			return new(mem) btConvexConvexAlgorithm(ci.m_manifold,ci,body0Wrap,body1Wrap,m_simplexSolver,m_pdSolver,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
 		}
 	};
 
 
 };
 
-#endif //CONVEX_CONVEX_ALGORITHM_H
+#endif //BT_CONVEX_CONVEX_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp
index dda85dc69..cce2d95bc 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp
@@ -19,10 +19,11 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
 #include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 //#include <stdio.h>
 
-btConvexPlaneCollisionAlgorithm::btConvexPlaneCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped, int numPerturbationIterations,int minimumPointsPerturbationThreshold)
+btConvexPlaneCollisionAlgorithm::btConvexPlaneCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* col0Wrap,const btCollisionObjectWrapper* col1Wrap, bool isSwapped, int numPerturbationIterations,int minimumPointsPerturbationThreshold)
 : btCollisionAlgorithm(ci),
 m_ownManifold(false),
 m_manifoldPtr(mf),
@@ -30,12 +31,12 @@ m_isSwapped(isSwapped),
 m_numPerturbationIterations(numPerturbationIterations),
 m_minimumPointsPerturbationThreshold(minimumPointsPerturbationThreshold)
 {
-	btCollisionObject* convexObj = m_isSwapped? col1 : col0;
-	btCollisionObject* planeObj = m_isSwapped? col0 : col1;
+	const btCollisionObjectWrapper* convexObjWrap = m_isSwapped? col1Wrap : col0Wrap;
+	const btCollisionObjectWrapper* planeObjWrap = m_isSwapped? col0Wrap : col1Wrap;
 
-	if (!m_manifoldPtr && m_dispatcher->needsCollision(convexObj,planeObj))
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(convexObjWrap->getCollisionObject(),planeObjWrap->getCollisionObject()))
 	{
-		m_manifoldPtr = m_dispatcher->getNewManifold(convexObj,planeObj);
+		m_manifoldPtr = m_dispatcher->getNewManifold(convexObjWrap->getCollisionObject(),planeObjWrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 }
@@ -50,25 +51,25 @@ btConvexPlaneCollisionAlgorithm::~btConvexPlaneCollisionAlgorithm()
 	}
 }
 
-void btConvexPlaneCollisionAlgorithm::collideSingleContact (const btQuaternion& perturbeRot, btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btConvexPlaneCollisionAlgorithm::collideSingleContact (const btQuaternion& perturbeRot, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
-    btCollisionObject* convexObj = m_isSwapped? body1 : body0;
-	btCollisionObject* planeObj = m_isSwapped? body0: body1;
+    const btCollisionObjectWrapper* convexObjWrap = m_isSwapped? body1Wrap : body0Wrap;
+	const btCollisionObjectWrapper* planeObjWrap = m_isSwapped? body0Wrap: body1Wrap;
 
-	btConvexShape* convexShape = (btConvexShape*) convexObj->getCollisionShape();
-	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) planeObj->getCollisionShape();
+	btConvexShape* convexShape = (btConvexShape*) convexObjWrap->getCollisionShape();
+	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) planeObjWrap->getCollisionShape();
 
     bool hasCollision = false;
 	const btVector3& planeNormal = planeShape->getPlaneNormal();
 	const btScalar& planeConstant = planeShape->getPlaneConstant();
 	
-	btTransform convexWorldTransform = convexObj->getWorldTransform();
+	btTransform convexWorldTransform = convexObjWrap->getWorldTransform();
 	btTransform convexInPlaneTrans;
-	convexInPlaneTrans= planeObj->getWorldTransform().inverse() * convexWorldTransform;
+	convexInPlaneTrans= planeObjWrap->getWorldTransform().inverse() * convexWorldTransform;
 	//now perturbe the convex-world transform
 	convexWorldTransform.getBasis()*=btMatrix3x3(perturbeRot);
 	btTransform planeInConvex;
-	planeInConvex= convexWorldTransform.inverse() * planeObj->getWorldTransform();
+	planeInConvex= convexWorldTransform.inverse() * planeObjWrap->getWorldTransform();
 	
 	btVector3 vtx = convexShape->localGetSupportingVertex(planeInConvex.getBasis()*-planeNormal);
 
@@ -76,43 +77,61 @@ void btConvexPlaneCollisionAlgorithm::collideSingleContact (const btQuaternion&
 	btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
 
 	btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
-	btVector3 vtxInPlaneWorld = planeObj->getWorldTransform() * vtxInPlaneProjected;
+	btVector3 vtxInPlaneWorld = planeObjWrap->getWorldTransform() * vtxInPlaneProjected;
 
 	hasCollision = distance < m_manifoldPtr->getContactBreakingThreshold();
 	resultOut->setPersistentManifold(m_manifoldPtr);
 	if (hasCollision)
 	{
 		/// report a contact. internally this will be kept persistent, and contact reduction is done
-		btVector3 normalOnSurfaceB = planeObj->getWorldTransform().getBasis() * planeNormal;
+		btVector3 normalOnSurfaceB = planeObjWrap->getWorldTransform().getBasis() * planeNormal;
 		btVector3 pOnB = vtxInPlaneWorld;
 		resultOut->addContactPoint(normalOnSurfaceB,pOnB,distance);
 	}
 }
 
 
-void btConvexPlaneCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btConvexPlaneCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	(void)dispatchInfo;
 	if (!m_manifoldPtr)
 		return;
 
-    btCollisionObject* convexObj = m_isSwapped? body1 : body0;
-	btCollisionObject* planeObj = m_isSwapped? body0: body1;
+	const btCollisionObjectWrapper* convexObjWrap = m_isSwapped? body1Wrap : body0Wrap;
+	const btCollisionObjectWrapper* planeObjWrap = m_isSwapped? body0Wrap: body1Wrap;
 
-	btConvexShape* convexShape = (btConvexShape*) convexObj->getCollisionShape();
-	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) planeObj->getCollisionShape();
+	btConvexShape* convexShape = (btConvexShape*) convexObjWrap->getCollisionShape();
+	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) planeObjWrap->getCollisionShape();
 
-    
+	bool hasCollision = false;
 	const btVector3& planeNormal = planeShape->getPlaneNormal();
-	//const btScalar& planeConstant = planeShape->getPlaneConstant();
+	const btScalar& planeConstant = planeShape->getPlaneConstant();
+	btTransform planeInConvex;
+	planeInConvex= convexObjWrap->getWorldTransform().inverse() * planeObjWrap->getWorldTransform();
+	btTransform convexInPlaneTrans;
+	convexInPlaneTrans= planeObjWrap->getWorldTransform().inverse() * convexObjWrap->getWorldTransform();
 
-	//first perform a collision query with the non-perturbated collision objects
+	btVector3 vtx = convexShape->localGetSupportingVertex(planeInConvex.getBasis()*-planeNormal);
+	btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+	btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+	btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+	btVector3 vtxInPlaneWorld = planeObjWrap->getWorldTransform() * vtxInPlaneProjected;
+
+	hasCollision = distance < m_manifoldPtr->getContactBreakingThreshold();
+	resultOut->setPersistentManifold(m_manifoldPtr);
+	if (hasCollision)
 	{
-		btQuaternion rotq(0,0,0,1);
-		collideSingleContact(rotq,body0,body1,dispatchInfo,resultOut);
+		/// report a contact. internally this will be kept persistent, and contact reduction is done
+		btVector3 normalOnSurfaceB = planeObjWrap->getWorldTransform().getBasis() * planeNormal;
+		btVector3 pOnB = vtxInPlaneWorld;
+		resultOut->addContactPoint(normalOnSurfaceB,pOnB,distance);
 	}
 
-	if (resultOut->getPersistentManifold()->getNumContacts()<m_minimumPointsPerturbationThreshold)
+	//the perturbation algorithm doesn't work well with implicit surfaces such as spheres, cylinder and cones:
+	//they keep on rolling forever because of the additional off-center contact points
+	//so only enable the feature for polyhedral shapes (btBoxShape, btConvexHullShape etc)
+	if (convexShape->isPolyhedral() && resultOut->getPersistentManifold()->getNumContacts()<m_minimumPointsPerturbationThreshold)
 	{
 		btVector3 v0,v1;
 		btPlaneSpace1(planeNormal,v0,v1);
@@ -130,7 +149,7 @@ void btConvexPlaneCollisionAlgorithm::processCollision (btCollisionObject* body0
 		{
 			btScalar iterationAngle = i*(SIMD_2_PI/btScalar(m_numPerturbationIterations));
 			btQuaternion rotq(planeNormal,iterationAngle);
-			collideSingleContact(rotq.inverse()*perturbeRot*rotq,body0,body1,dispatchInfo,resultOut);
+			collideSingleContact(rotq.inverse()*perturbeRot*rotq,body0Wrap,body1Wrap,dispatchInfo,resultOut);
 		}
 	}
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h
index f49ac45e7..d28c430c4 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONVEX_PLANE_COLLISION_ALGORITHM_H
-#define CONVEX_PLANE_COLLISION_ALGORITHM_H
+#ifndef BT_CONVEX_PLANE_COLLISION_ALGORITHM_H
+#define BT_CONVEX_PLANE_COLLISION_ALGORITHM_H
 
 #include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -36,13 +36,13 @@ class btConvexPlaneCollisionAlgorithm : public btCollisionAlgorithm
 
 public:
 
-	btConvexPlaneCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped, int numPerturbationIterations,int minimumPointsPerturbationThreshold);
+	btConvexPlaneCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap, bool isSwapped, int numPerturbationIterations,int minimumPointsPerturbationThreshold);
 
 	virtual ~btConvexPlaneCollisionAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
-	void collideSingleContact (const btQuaternion& perturbeRot, btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	void collideSingleContact (const btQuaternion& perturbeRot, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -61,24 +61,24 @@ public:
 			
 		CreateFunc() 
 			: m_numPerturbationIterations(1),
-			m_minimumPointsPerturbationThreshold(1)
+			m_minimumPointsPerturbationThreshold(0)
 		{
 		}
 		
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btConvexPlaneCollisionAlgorithm));
 			if (!m_swapped)
 			{
-				return new(mem) btConvexPlaneCollisionAlgorithm(0,ci,body0,body1,false,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+				return new(mem) btConvexPlaneCollisionAlgorithm(0,ci,body0Wrap,body1Wrap,false,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
 			} else
 			{
-				return new(mem) btConvexPlaneCollisionAlgorithm(0,ci,body0,body1,true,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
+				return new(mem) btConvexPlaneCollisionAlgorithm(0,ci,body0Wrap,body1Wrap,true,m_numPerturbationIterations,m_minimumPointsPerturbationThreshold);
 			}
 		}
 	};
 
 };
 
-#endif //CONVEX_PLANE_COLLISION_ALGORITHM_H
+#endif //BT_CONVEX_PLANE_COLLISION_ALGORITHM_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp
index c27d8ce07..7faee6faf 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.cpp
@@ -296,3 +296,14 @@ void btDefaultCollisionConfiguration::setConvexConvexMultipointIterations(int nu
 	convexConvex->m_numPerturbationIterations = numPerturbationIterations;
 	convexConvex->m_minimumPointsPerturbationThreshold = minimumPointsPerturbationThreshold;
 }
+
+void	btDefaultCollisionConfiguration::setPlaneConvexMultipointIterations(int numPerturbationIterations, int minimumPointsPerturbationThreshold)
+{
+	btConvexPlaneCollisionAlgorithm::CreateFunc* cpCF = (btConvexPlaneCollisionAlgorithm::CreateFunc*)m_convexPlaneCF;
+	cpCF->m_numPerturbationIterations = numPerturbationIterations;
+	cpCF->m_minimumPointsPerturbationThreshold = minimumPointsPerturbationThreshold;
+	
+	btConvexPlaneCollisionAlgorithm::CreateFunc* pcCF = (btConvexPlaneCollisionAlgorithm::CreateFunc*)m_planeConvexCF;
+	pcCF->m_numPerturbationIterations = numPerturbationIterations;
+	pcCF->m_minimumPointsPerturbationThreshold = minimumPointsPerturbationThreshold;
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h
index 37748663a..474785bfc 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h
@@ -78,10 +78,8 @@ protected:
 	btCollisionAlgorithmCreateFunc*	m_swappedCompoundCreateFunc;
 	btCollisionAlgorithmCreateFunc* m_emptyCreateFunc;
 	btCollisionAlgorithmCreateFunc* m_sphereSphereCF;
-#ifdef USE_BUGGY_SPHERE_BOX_ALGORITHM
 	btCollisionAlgorithmCreateFunc* m_sphereBoxCF;
 	btCollisionAlgorithmCreateFunc* m_boxSphereCF;
-#endif //USE_BUGGY_SPHERE_BOX_ALGORITHM
 
 	btCollisionAlgorithmCreateFunc* m_boxBoxCF;
 	btCollisionAlgorithmCreateFunc*	m_sphereTriangleCF;
@@ -112,6 +110,11 @@ public:
 		return m_stackAlloc;
 	}
 
+	virtual	btVoronoiSimplexSolver*	getSimplexSolver()
+	{
+		return m_simplexSolver;
+	}
+
 
 	virtual btCollisionAlgorithmCreateFunc* getCollisionAlgorithmCreateFunc(int proxyType0,int proxyType1);
 
@@ -124,6 +127,8 @@ public:
 	///@todo we could add a per-object setting of those parameters, for level-of-detail collision detection.
 	void	setConvexConvexMultipointIterations(int numPerturbationIterations=3, int minimumPointsPerturbationThreshold = 3);
 
+	void	setPlaneConvexMultipointIterations(int numPerturbationIterations=3, int minimumPointsPerturbationThreshold = 3);
+
 };
 
 #endif //BT_DEFAULT_COLLISION_CONFIGURATION
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp
index 936054387..5fa1c8be5 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.cpp
@@ -22,7 +22,7 @@ btEmptyAlgorithm::btEmptyAlgorithm(const btCollisionAlgorithmConstructionInfo& c
 {
 }
 
-void btEmptyAlgorithm::processCollision (btCollisionObject* ,btCollisionObject* ,const btDispatcherInfo& ,btManifoldResult* )
+void btEmptyAlgorithm::processCollision (const btCollisionObjectWrapper* ,const btCollisionObjectWrapper* ,const btDispatcherInfo& ,btManifoldResult* )
 {
 }
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h
index e54721dec..cb0f15218 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef EMPTY_ALGORITH
-#define EMPTY_ALGORITH
+#ifndef BT_EMPTY_ALGORITH
+#define BT_EMPTY_ALGORITH
 #include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "btCollisionCreateFunc.h"
 #include "btCollisionDispatcher.h"
@@ -30,7 +30,7 @@ public:
 	
 	btEmptyAlgorithm(const btCollisionAlgorithmConstructionInfo& ci);
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -40,10 +40,10 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
-		{
-			(void)body0;
-			(void)body1;
+        virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
+        {
+			(void)body0Wrap;
+			(void)body1Wrap;
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btEmptyAlgorithm));
 			return new(mem) btEmptyAlgorithm(ci);
 		}
@@ -51,4 +51,4 @@ public:
 
 } ATTRIBUTE_ALIGNED(16);
 
-#endif //EMPTY_ALGORITH
+#endif //BT_EMPTY_ALGORITH
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btGhostObject.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btGhostObject.h
index 95b575024..8ec861385 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btGhostObject.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btGhostObject.h
@@ -160,7 +160,7 @@ public:
 		return 0;
 	}
 
-	virtual void	removeOverlappingPairsContainingProxy(btBroadphaseProxy* proxy0,btDispatcher* dispatcher)
+	virtual void	removeOverlappingPairsContainingProxy(btBroadphaseProxy* /*proxy0*/,btDispatcher* /*dispatcher*/)
 	{
 		btAssert(0);
 		//need to keep track of all ghost objects and call them here
@@ -171,4 +171,5 @@ public:
 
 };
 
-#endif
\ No newline at end of file
+#endif
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp
new file mode 100644
index 000000000..73fa4e87e
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp
@@ -0,0 +1,842 @@
+#include "btInternalEdgeUtility.h"
+
+#include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btTriangleShape.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
+#include "LinearMath/btIDebugDraw.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
+
+//#define DEBUG_INTERNAL_EDGE
+
+#ifdef DEBUG_INTERNAL_EDGE
+#include <stdio.h>
+#endif //DEBUG_INTERNAL_EDGE
+
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+static btIDebugDraw* gDebugDrawer = 0;
+
+void	btSetDebugDrawer(btIDebugDraw* debugDrawer)
+{
+	gDebugDrawer = debugDrawer;
+}
+
+static void    btDebugDrawLine(const btVector3& from,const btVector3& to, const btVector3& color)
+{
+	if (gDebugDrawer)
+		gDebugDrawer->drawLine(from,to,color);
+}
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+static int	btGetHash(int partId, int triangleIndex)
+{
+	int hash = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex;
+	return hash;
+}
+
+
+
+static btScalar btGetAngle(const btVector3& edgeA, const btVector3& normalA,const btVector3& normalB)
+{
+	const btVector3 refAxis0  = edgeA;
+	const btVector3 refAxis1  = normalA;
+	const btVector3 swingAxis = normalB;
+	btScalar angle = btAtan2(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1));
+	return  angle;
+}
+
+
+struct btConnectivityProcessor : public btTriangleCallback
+{
+	int				m_partIdA;
+	int				m_triangleIndexA;
+	btVector3*		m_triangleVerticesA;
+	btTriangleInfoMap*	m_triangleInfoMap;
+
+
+	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex)
+	{
+		//skip self-collisions
+		if ((m_partIdA == partId) && (m_triangleIndexA == triangleIndex))
+			return;
+
+		//skip duplicates (disabled for now)
+		//if ((m_partIdA <= partId) && (m_triangleIndexA <= triangleIndex))
+		//	return;
+
+		//search for shared vertices and edges
+		int numshared = 0;
+		int sharedVertsA[3]={-1,-1,-1};
+		int sharedVertsB[3]={-1,-1,-1};
+
+		///skip degenerate triangles
+		btScalar crossBSqr = ((triangle[1]-triangle[0]).cross(triangle[2]-triangle[0])).length2();
+		if (crossBSqr < m_triangleInfoMap->m_equalVertexThreshold)
+			return;
+
+
+		btScalar crossASqr = ((m_triangleVerticesA[1]-m_triangleVerticesA[0]).cross(m_triangleVerticesA[2]-m_triangleVerticesA[0])).length2();
+		///skip degenerate triangles
+		if (crossASqr< m_triangleInfoMap->m_equalVertexThreshold)
+			return;
+
+#if 0
+		printf("triangle A[0]	=	(%f,%f,%f)\ntriangle A[1]	=	(%f,%f,%f)\ntriangle A[2]	=	(%f,%f,%f)\n",
+			m_triangleVerticesA[0].getX(),m_triangleVerticesA[0].getY(),m_triangleVerticesA[0].getZ(),
+			m_triangleVerticesA[1].getX(),m_triangleVerticesA[1].getY(),m_triangleVerticesA[1].getZ(),
+			m_triangleVerticesA[2].getX(),m_triangleVerticesA[2].getY(),m_triangleVerticesA[2].getZ());
+
+		printf("partId=%d, triangleIndex=%d\n",partId,triangleIndex);
+		printf("triangle B[0]	=	(%f,%f,%f)\ntriangle B[1]	=	(%f,%f,%f)\ntriangle B[2]	=	(%f,%f,%f)\n",
+			triangle[0].getX(),triangle[0].getY(),triangle[0].getZ(),
+			triangle[1].getX(),triangle[1].getY(),triangle[1].getZ(),
+			triangle[2].getX(),triangle[2].getY(),triangle[2].getZ());
+#endif
+
+		for (int i=0;i<3;i++)
+		{
+			for (int j=0;j<3;j++)
+			{
+				if ( (m_triangleVerticesA[i]-triangle[j]).length2() < m_triangleInfoMap->m_equalVertexThreshold)
+				{
+					sharedVertsA[numshared] = i;
+					sharedVertsB[numshared] = j;
+					numshared++;
+					///degenerate case
+					if(numshared >= 3)
+						return;
+				}
+			}
+			///degenerate case
+			if(numshared >= 3)
+				return;
+		}
+		switch (numshared)
+		{
+		case 0:
+			{
+				break;
+			}
+		case 1:
+			{
+				//shared vertex
+				break;
+			}
+		case 2:
+			{
+				//shared edge
+				//we need to make sure the edge is in the order V2V0 and not V0V2 so that the signs are correct
+				if (sharedVertsA[0] == 0 && sharedVertsA[1] == 2)
+				{
+					sharedVertsA[0] = 2;
+					sharedVertsA[1] = 0;
+					int tmp = sharedVertsB[1];
+					sharedVertsB[1] = sharedVertsB[0];
+					sharedVertsB[0] = tmp;
+				}
+
+				int hash = btGetHash(m_partIdA,m_triangleIndexA);
+
+				btTriangleInfo* info = m_triangleInfoMap->find(hash);
+				if (!info)
+				{
+					btTriangleInfo tmp;
+					m_triangleInfoMap->insert(hash,tmp);
+					info = m_triangleInfoMap->find(hash);
+				}
+
+				int sumvertsA = sharedVertsA[0]+sharedVertsA[1];
+				int otherIndexA = 3-sumvertsA;
+
+				
+				btVector3 edge(m_triangleVerticesA[sharedVertsA[1]]-m_triangleVerticesA[sharedVertsA[0]]);
+
+				btTriangleShape tA(m_triangleVerticesA[0],m_triangleVerticesA[1],m_triangleVerticesA[2]);
+				int otherIndexB = 3-(sharedVertsB[0]+sharedVertsB[1]);
+
+				btTriangleShape tB(triangle[sharedVertsB[1]],triangle[sharedVertsB[0]],triangle[otherIndexB]);
+				//btTriangleShape tB(triangle[0],triangle[1],triangle[2]);
+
+				btVector3 normalA;
+				btVector3 normalB;
+				tA.calcNormal(normalA);
+				tB.calcNormal(normalB);
+				edge.normalize();
+				btVector3 edgeCrossA = edge.cross(normalA).normalize();
+
+				{
+					btVector3 tmp = m_triangleVerticesA[otherIndexA]-m_triangleVerticesA[sharedVertsA[0]];
+					if (edgeCrossA.dot(tmp) < 0)
+					{
+						edgeCrossA*=-1;
+					}
+				}
+
+				btVector3 edgeCrossB = edge.cross(normalB).normalize();
+
+				{
+					btVector3 tmp = triangle[otherIndexB]-triangle[sharedVertsB[0]];
+					if (edgeCrossB.dot(tmp) < 0)
+					{
+						edgeCrossB*=-1;
+					}
+				}
+
+				btScalar	angle2 = 0;
+				btScalar	ang4 = 0.f;
+
+
+				btVector3 calculatedEdge = edgeCrossA.cross(edgeCrossB);
+				btScalar len2 = calculatedEdge.length2();
+
+				btScalar correctedAngle(0);
+				btVector3 calculatedNormalB = normalA;
+				bool isConvex = false;
+
+				if (len2<m_triangleInfoMap->m_planarEpsilon)
+				{
+					angle2 = 0.f;
+					ang4 = 0.f;
+				} else
+				{
+
+					calculatedEdge.normalize();
+					btVector3 calculatedNormalA = calculatedEdge.cross(edgeCrossA);
+					calculatedNormalA.normalize();
+					angle2 = btGetAngle(calculatedNormalA,edgeCrossA,edgeCrossB);
+					ang4 = SIMD_PI-angle2;
+					btScalar dotA = normalA.dot(edgeCrossB);
+					///@todo: check if we need some epsilon, due to floating point imprecision
+					isConvex = (dotA<0.);
+
+					correctedAngle = isConvex ? ang4 : -ang4;
+					btQuaternion orn2(calculatedEdge,-correctedAngle);
+					calculatedNormalB = btMatrix3x3(orn2)*normalA;
+
+
+				}
+
+				
+
+				
+							
+				//alternatively use 
+				//btVector3 calculatedNormalB2 = quatRotate(orn,normalA);
+
+
+				switch (sumvertsA)
+				{
+				case 1:
+					{
+						btVector3 edge = m_triangleVerticesA[0]-m_triangleVerticesA[1];
+						btQuaternion orn(edge,-correctedAngle);
+						btVector3 computedNormalB = quatRotate(orn,normalA);
+						btScalar bla = computedNormalB.dot(normalB);
+						if (bla<0)
+						{
+							computedNormalB*=-1;
+							info->m_flags |= TRI_INFO_V0V1_SWAP_NORMALB;
+						}
+#ifdef DEBUG_INTERNAL_EDGE
+						if ((computedNormalB-normalB).length()>0.0001)
+						{
+							printf("warning: normals not identical\n");
+						}
+#endif//DEBUG_INTERNAL_EDGE
+
+						info->m_edgeV0V1Angle = -correctedAngle;
+
+						if (isConvex)
+							info->m_flags |= TRI_INFO_V0V1_CONVEX;
+						break;
+					}
+				case 2:
+					{
+						btVector3 edge = m_triangleVerticesA[2]-m_triangleVerticesA[0];
+						btQuaternion orn(edge,-correctedAngle);
+						btVector3 computedNormalB = quatRotate(orn,normalA);
+						if (computedNormalB.dot(normalB)<0)
+						{
+							computedNormalB*=-1;
+							info->m_flags |= TRI_INFO_V2V0_SWAP_NORMALB;
+						}
+
+#ifdef DEBUG_INTERNAL_EDGE
+						if ((computedNormalB-normalB).length()>0.0001)
+						{
+							printf("warning: normals not identical\n");
+						}
+#endif //DEBUG_INTERNAL_EDGE
+						info->m_edgeV2V0Angle = -correctedAngle;
+						if (isConvex)
+							info->m_flags |= TRI_INFO_V2V0_CONVEX;
+						break;	
+					}
+				case 3:
+					{
+						btVector3 edge = m_triangleVerticesA[1]-m_triangleVerticesA[2];
+						btQuaternion orn(edge,-correctedAngle);
+						btVector3 computedNormalB = quatRotate(orn,normalA);
+						if (computedNormalB.dot(normalB)<0)
+						{
+							info->m_flags |= TRI_INFO_V1V2_SWAP_NORMALB;
+							computedNormalB*=-1;
+						}
+#ifdef DEBUG_INTERNAL_EDGE
+						if ((computedNormalB-normalB).length()>0.0001)
+						{
+							printf("warning: normals not identical\n");
+						}
+#endif //DEBUG_INTERNAL_EDGE
+						info->m_edgeV1V2Angle = -correctedAngle;
+
+						if (isConvex)
+							info->m_flags |= TRI_INFO_V1V2_CONVEX;
+						break;
+					}
+				}
+
+				break;
+			}
+		default:
+			{
+				//				printf("warning: duplicate triangle\n");
+			}
+
+		}
+	}
+};
+/////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////////
+
+void btGenerateInternalEdgeInfo (btBvhTriangleMeshShape*trimeshShape, btTriangleInfoMap* triangleInfoMap)
+{
+	//the user pointer shouldn't already be used for other purposes, we intend to store connectivity info there!
+	if (trimeshShape->getTriangleInfoMap())
+		return;
+
+	trimeshShape->setTriangleInfoMap(triangleInfoMap);
+
+	btStridingMeshInterface* meshInterface = trimeshShape->getMeshInterface();
+	const btVector3& meshScaling = meshInterface->getScaling();
+
+	for (int partId = 0; partId< meshInterface->getNumSubParts();partId++)
+	{
+		const unsigned char *vertexbase = 0;
+		int numverts = 0;
+		PHY_ScalarType type = PHY_INTEGER;
+		int stride = 0;
+		const unsigned char *indexbase = 0;
+		int indexstride = 0;
+		int numfaces = 0;
+		PHY_ScalarType indicestype = PHY_INTEGER;
+		//PHY_ScalarType indexType=0;
+
+		btVector3 triangleVerts[3];
+		meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,	type,stride,&indexbase,indexstride,numfaces,indicestype,partId);
+		btVector3 aabbMin,aabbMax;
+
+		for (int triangleIndex = 0 ; triangleIndex < numfaces;triangleIndex++)
+		{
+			unsigned int* gfxbase = (unsigned int*)(indexbase+triangleIndex*indexstride);
+
+			for (int j=2;j>=0;j--)
+			{
+
+				int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j];
+				if (type == PHY_FLOAT)
+				{
+					float* graphicsbase = (float*)(vertexbase+graphicsindex*stride);
+					triangleVerts[j] = btVector3(
+						graphicsbase[0]*meshScaling.getX(),
+						graphicsbase[1]*meshScaling.getY(),
+						graphicsbase[2]*meshScaling.getZ());
+				}
+				else
+				{
+					double* graphicsbase = (double*)(vertexbase+graphicsindex*stride);
+					triangleVerts[j] = btVector3( btScalar(graphicsbase[0]*meshScaling.getX()), btScalar(graphicsbase[1]*meshScaling.getY()), btScalar(graphicsbase[2]*meshScaling.getZ()));
+				}
+			}
+			aabbMin.setValue(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
+			aabbMax.setValue(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT)); 
+			aabbMin.setMin(triangleVerts[0]);
+			aabbMax.setMax(triangleVerts[0]);
+			aabbMin.setMin(triangleVerts[1]);
+			aabbMax.setMax(triangleVerts[1]);
+			aabbMin.setMin(triangleVerts[2]);
+			aabbMax.setMax(triangleVerts[2]);
+
+			btConnectivityProcessor connectivityProcessor;
+			connectivityProcessor.m_partIdA = partId;
+			connectivityProcessor.m_triangleIndexA = triangleIndex;
+			connectivityProcessor.m_triangleVerticesA = &triangleVerts[0];
+			connectivityProcessor.m_triangleInfoMap  = triangleInfoMap;
+
+			trimeshShape->processAllTriangles(&connectivityProcessor,aabbMin,aabbMax);
+		}
+
+	}
+
+}
+
+
+
+
+// Given a point and a line segment (defined by two points), compute the closest point
+// in the line.  Cap the point at the endpoints of the line segment.
+void btNearestPointInLineSegment(const btVector3 &point, const btVector3& line0, const btVector3& line1, btVector3& nearestPoint)
+{
+	btVector3 lineDelta     = line1 - line0;
+
+	// Handle degenerate lines
+	if ( lineDelta.fuzzyZero())
+	{
+		nearestPoint = line0;
+	}
+	else
+	{
+		btScalar delta = (point-line0).dot(lineDelta) / (lineDelta).dot(lineDelta);
+
+		// Clamp the point to conform to the segment's endpoints
+		if ( delta < 0 )
+			delta = 0;
+		else if ( delta > 1 )
+			delta = 1;
+
+		nearestPoint = line0 + lineDelta*delta;
+	}
+}
+
+
+
+
+bool	btClampNormal(const btVector3& edge,const btVector3& tri_normal_org,const btVector3& localContactNormalOnB, btScalar correctedEdgeAngle, btVector3 & clampedLocalNormal)
+{
+	btVector3 tri_normal = tri_normal_org;
+	//we only have a local triangle normal, not a local contact normal -> only normal in world space...
+	//either compute the current angle all in local space, or all in world space
+
+	btVector3 edgeCross = edge.cross(tri_normal).normalize();
+	btScalar curAngle = btGetAngle(edgeCross,tri_normal,localContactNormalOnB);
+
+	if (correctedEdgeAngle<0)
+	{
+		if (curAngle < correctedEdgeAngle)
+		{
+			btScalar diffAngle = correctedEdgeAngle-curAngle;
+			btQuaternion rotation(edge,diffAngle );
+			clampedLocalNormal = btMatrix3x3(rotation)*localContactNormalOnB;
+			return true;
+		}
+	}
+
+	if (correctedEdgeAngle>=0)
+	{
+		if (curAngle > correctedEdgeAngle)
+		{
+			btScalar diffAngle = correctedEdgeAngle-curAngle;
+			btQuaternion rotation(edge,diffAngle );
+			clampedLocalNormal = btMatrix3x3(rotation)*localContactNormalOnB;
+			return true;
+		}
+	}
+	return false;
+}
+
+
+
+/// Changes a btManifoldPoint collision normal to the normal from the mesh.
+void btAdjustInternalEdgeContacts(btManifoldPoint& cp, const btCollisionObjectWrapper* colObj0Wrap,const btCollisionObjectWrapper* colObj1Wrap, int partId0, int index0, int normalAdjustFlags)
+{
+	//btAssert(colObj0->getCollisionShape()->getShapeType() == TRIANGLE_SHAPE_PROXYTYPE);
+	if (colObj0Wrap->getCollisionShape()->getShapeType() != TRIANGLE_SHAPE_PROXYTYPE)
+		return;
+
+	btBvhTriangleMeshShape* trimesh = 0;
+	
+	if( colObj0Wrap->getCollisionObject()->getCollisionShape()->getShapeType() == SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE )
+	   trimesh = ((btScaledBvhTriangleMeshShape*)colObj0Wrap->getCollisionObject()->getCollisionShape())->getChildShape();
+   else	   
+	   trimesh = (btBvhTriangleMeshShape*)colObj0Wrap->getCollisionObject()->getCollisionShape();
+	   
+   	btTriangleInfoMap* triangleInfoMapPtr = (btTriangleInfoMap*) trimesh->getTriangleInfoMap();
+	if (!triangleInfoMapPtr)
+		return;
+
+	int hash = btGetHash(partId0,index0);
+
+
+	btTriangleInfo* info = triangleInfoMapPtr->find(hash);
+	if (!info)
+		return;
+
+	btScalar frontFacing = (normalAdjustFlags & BT_TRIANGLE_CONVEX_BACKFACE_MODE)==0? 1.f : -1.f;
+	
+	const btTriangleShape* tri_shape = static_cast<const btTriangleShape*>(colObj0Wrap->getCollisionShape());
+	btVector3 v0,v1,v2;
+	tri_shape->getVertex(0,v0);
+	tri_shape->getVertex(1,v1);
+	tri_shape->getVertex(2,v2);
+
+	//btVector3 center = (v0+v1+v2)*btScalar(1./3.);
+
+	btVector3 red(1,0,0), green(0,1,0),blue(0,0,1),white(1,1,1),black(0,0,0);
+	btVector3 tri_normal;
+	tri_shape->calcNormal(tri_normal);
+
+	//btScalar dot = tri_normal.dot(cp.m_normalWorldOnB);
+	btVector3 nearest;
+	btNearestPointInLineSegment(cp.m_localPointB,v0,v1,nearest);
+
+	btVector3 contact = cp.m_localPointB;
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+	const btTransform& tr = colObj0->getWorldTransform();
+	btDebugDrawLine(tr*nearest,tr*cp.m_localPointB,red);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+
+	bool isNearEdge = false;
+
+	int numConcaveEdgeHits = 0;
+	int numConvexEdgeHits = 0;
+
+	btVector3 localContactNormalOnB = colObj0Wrap->getWorldTransform().getBasis().transpose() * cp.m_normalWorldOnB;
+	localContactNormalOnB.normalize();//is this necessary?
+	
+	// Get closest edge
+	int      bestedge=-1;
+	btScalar    disttobestedge=BT_LARGE_FLOAT;
+	//
+	// Edge 0 -> 1
+	if (btFabs(info->m_edgeV0V1Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{	
+	   btVector3 nearest;
+	   btNearestPointInLineSegment( cp.m_localPointB, v0, v1, nearest );
+	   btScalar     len=(contact-nearest).length();
+	   //
+	   if( len < disttobestedge )
+	   {
+	      bestedge=0;
+	      disttobestedge=len;
+      }	      
+   }	   
+	// Edge 1 -> 2
+	if (btFabs(info->m_edgeV1V2Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{	
+	   btVector3 nearest;
+	   btNearestPointInLineSegment( cp.m_localPointB, v1, v2, nearest );
+	   btScalar     len=(contact-nearest).length();
+	   //
+	   if( len < disttobestedge )
+	   {
+	      bestedge=1;
+	      disttobestedge=len;
+      }	      
+   }	   
+	// Edge 2 -> 0
+	if (btFabs(info->m_edgeV2V0Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{	
+	   btVector3 nearest;
+	   btNearestPointInLineSegment( cp.m_localPointB, v2, v0, nearest );
+	   btScalar     len=(contact-nearest).length();
+	   //
+	   if( len < disttobestedge )
+	   {
+	      bestedge=2;
+	      disttobestedge=len;
+      }	      
+   }   	      	
+	
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+   btVector3 upfix=tri_normal * btVector3(0.1f,0.1f,0.1f);
+   btDebugDrawLine(tr * v0 + upfix, tr * v1 + upfix, red );
+#endif   
+	if (btFabs(info->m_edgeV0V1Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+		btDebugDrawLine(tr*contact,tr*(contact+cp.m_normalWorldOnB*10),black);
+#endif
+		btScalar len = (contact-nearest).length();
+		if(len<triangleInfoMapPtr->m_edgeDistanceThreshold)
+		if( bestedge==0 )
+		{
+			btVector3 edge(v0-v1);
+			isNearEdge = true;
+
+			if (info->m_edgeV0V1Angle==btScalar(0))
+			{
+				numConcaveEdgeHits++;
+			} else
+			{
+
+				bool isEdgeConvex = (info->m_flags & TRI_INFO_V0V1_CONVEX);
+				btScalar swapFactor = isEdgeConvex ? btScalar(1) : btScalar(-1);
+	#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+				btDebugDrawLine(tr*nearest,tr*(nearest+swapFactor*tri_normal*10),white);
+	#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+				btVector3 nA = swapFactor * tri_normal;
+
+				btQuaternion orn(edge,info->m_edgeV0V1Angle);
+				btVector3 computedNormalB = quatRotate(orn,tri_normal);
+				if (info->m_flags & TRI_INFO_V0V1_SWAP_NORMALB)
+					computedNormalB*=-1;
+				btVector3 nB = swapFactor*computedNormalB;
+
+				btScalar	NdotA = localContactNormalOnB.dot(nA);
+				btScalar	NdotB = localContactNormalOnB.dot(nB);
+				bool backFacingNormal = (NdotA< triangleInfoMapPtr->m_convexEpsilon) && (NdotB<triangleInfoMapPtr->m_convexEpsilon);
+
+#ifdef DEBUG_INTERNAL_EDGE
+				{
+					
+					btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+tr.getBasis()*(nB*20),red);
+				}
+#endif //DEBUG_INTERNAL_EDGE
+
+
+				if (backFacingNormal)
+				{
+					numConcaveEdgeHits++;
+				}
+				else
+				{
+					numConvexEdgeHits++;
+					btVector3 clampedLocalNormal;
+					bool isClamped = btClampNormal(edge,swapFactor*tri_normal,localContactNormalOnB, info->m_edgeV0V1Angle,clampedLocalNormal);
+					if (isClamped)
+					{
+						if (((normalAdjustFlags & BT_TRIANGLE_CONVEX_DOUBLE_SIDED)!=0) || (clampedLocalNormal.dot(frontFacing*tri_normal)>0))
+						{
+							btVector3 newNormal = colObj0Wrap->getWorldTransform().getBasis() * clampedLocalNormal;
+							//					cp.m_distance1 = cp.m_distance1 * newNormal.dot(cp.m_normalWorldOnB);
+							cp.m_normalWorldOnB = newNormal;
+							// Reproject collision point along normal. (what about cp.m_distance1?)
+							cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+							cp.m_localPointB = colObj0Wrap->getWorldTransform().invXform(cp.m_positionWorldOnB);
+							
+						}
+					}
+				}
+			}
+		}
+	}
+
+	btNearestPointInLineSegment(contact,v1,v2,nearest);
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+	btDebugDrawLine(tr*nearest,tr*cp.m_localPointB,green);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+   btDebugDrawLine(tr * v1 + upfix, tr * v2 + upfix , green );
+#endif   
+
+	if (btFabs(info->m_edgeV1V2Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+		btDebugDrawLine(tr*contact,tr*(contact+cp.m_normalWorldOnB*10),black);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+
+		btScalar len = (contact-nearest).length();
+		if(len<triangleInfoMapPtr->m_edgeDistanceThreshold)
+		if( bestedge==1 )
+		{
+			isNearEdge = true;
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+			btDebugDrawLine(tr*nearest,tr*(nearest+tri_normal*10),white);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+			btVector3 edge(v1-v2);
+
+			isNearEdge = true;
+
+			if (info->m_edgeV1V2Angle == btScalar(0))
+			{
+				numConcaveEdgeHits++;
+			} else
+			{
+				bool isEdgeConvex = (info->m_flags & TRI_INFO_V1V2_CONVEX)!=0;
+				btScalar swapFactor = isEdgeConvex ? btScalar(1) : btScalar(-1);
+	#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+				btDebugDrawLine(tr*nearest,tr*(nearest+swapFactor*tri_normal*10),white);
+	#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+				btVector3 nA = swapFactor * tri_normal;
+				
+				btQuaternion orn(edge,info->m_edgeV1V2Angle);
+				btVector3 computedNormalB = quatRotate(orn,tri_normal);
+				if (info->m_flags & TRI_INFO_V1V2_SWAP_NORMALB)
+					computedNormalB*=-1;
+				btVector3 nB = swapFactor*computedNormalB;
+
+#ifdef DEBUG_INTERNAL_EDGE
+				{
+					btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+tr.getBasis()*(nB*20),red);
+				}
+#endif //DEBUG_INTERNAL_EDGE
+
+
+				btScalar	NdotA = localContactNormalOnB.dot(nA);
+				btScalar	NdotB = localContactNormalOnB.dot(nB);
+				bool backFacingNormal = (NdotA< triangleInfoMapPtr->m_convexEpsilon) && (NdotB<triangleInfoMapPtr->m_convexEpsilon);
+
+				if (backFacingNormal)
+				{
+					numConcaveEdgeHits++;
+				}
+				else
+				{
+					numConvexEdgeHits++;
+					btVector3 localContactNormalOnB = colObj0Wrap->getWorldTransform().getBasis().transpose() * cp.m_normalWorldOnB;
+					btVector3 clampedLocalNormal;
+					bool isClamped = btClampNormal(edge,swapFactor*tri_normal,localContactNormalOnB, info->m_edgeV1V2Angle,clampedLocalNormal);
+					if (isClamped)
+					{
+						if (((normalAdjustFlags & BT_TRIANGLE_CONVEX_DOUBLE_SIDED)!=0) || (clampedLocalNormal.dot(frontFacing*tri_normal)>0))
+						{
+							btVector3 newNormal = colObj0Wrap->getWorldTransform().getBasis() * clampedLocalNormal;
+							//					cp.m_distance1 = cp.m_distance1 * newNormal.dot(cp.m_normalWorldOnB);
+							cp.m_normalWorldOnB = newNormal;
+							// Reproject collision point along normal.
+							cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+							cp.m_localPointB = colObj0Wrap->getWorldTransform().invXform(cp.m_positionWorldOnB);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	btNearestPointInLineSegment(contact,v2,v0,nearest);
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+	btDebugDrawLine(tr*nearest,tr*cp.m_localPointB,blue);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+   btDebugDrawLine(tr * v2 + upfix, tr * v0 + upfix , blue );
+#endif   
+
+	if (btFabs(info->m_edgeV2V0Angle)< triangleInfoMapPtr->m_maxEdgeAngleThreshold)
+	{
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+		btDebugDrawLine(tr*contact,tr*(contact+cp.m_normalWorldOnB*10),black);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+		btScalar len = (contact-nearest).length();
+		if(len<triangleInfoMapPtr->m_edgeDistanceThreshold)
+		if( bestedge==2 )
+		{
+			isNearEdge = true;
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+			btDebugDrawLine(tr*nearest,tr*(nearest+tri_normal*10),white);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+			btVector3 edge(v2-v0);
+
+			if (info->m_edgeV2V0Angle==btScalar(0))
+			{
+				numConcaveEdgeHits++;
+			} else
+			{
+
+				bool isEdgeConvex = (info->m_flags & TRI_INFO_V2V0_CONVEX)!=0;
+				btScalar swapFactor = isEdgeConvex ? btScalar(1) : btScalar(-1);
+	#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+				btDebugDrawLine(tr*nearest,tr*(nearest+swapFactor*tri_normal*10),white);
+	#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+				btVector3 nA = swapFactor * tri_normal;
+				btQuaternion orn(edge,info->m_edgeV2V0Angle);
+				btVector3 computedNormalB = quatRotate(orn,tri_normal);
+				if (info->m_flags & TRI_INFO_V2V0_SWAP_NORMALB)
+					computedNormalB*=-1;
+				btVector3 nB = swapFactor*computedNormalB;
+
+#ifdef DEBUG_INTERNAL_EDGE
+				{
+					btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+tr.getBasis()*(nB*20),red);
+				}
+#endif //DEBUG_INTERNAL_EDGE
+
+				btScalar	NdotA = localContactNormalOnB.dot(nA);
+				btScalar	NdotB = localContactNormalOnB.dot(nB);
+				bool backFacingNormal = (NdotA< triangleInfoMapPtr->m_convexEpsilon) && (NdotB<triangleInfoMapPtr->m_convexEpsilon);
+
+				if (backFacingNormal)
+				{
+					numConcaveEdgeHits++;
+				}
+				else
+				{
+					numConvexEdgeHits++;
+					//				printf("hitting convex edge\n");
+
+
+					btVector3 localContactNormalOnB = colObj0Wrap->getWorldTransform().getBasis().transpose() * cp.m_normalWorldOnB;
+					btVector3 clampedLocalNormal;
+					bool isClamped = btClampNormal(edge,swapFactor*tri_normal,localContactNormalOnB,info->m_edgeV2V0Angle,clampedLocalNormal);
+					if (isClamped)
+					{
+						if (((normalAdjustFlags & BT_TRIANGLE_CONVEX_DOUBLE_SIDED)!=0) || (clampedLocalNormal.dot(frontFacing*tri_normal)>0))
+						{
+							btVector3 newNormal = colObj0Wrap->getWorldTransform().getBasis() * clampedLocalNormal;
+							//					cp.m_distance1 = cp.m_distance1 * newNormal.dot(cp.m_normalWorldOnB);
+							cp.m_normalWorldOnB = newNormal;
+							// Reproject collision point along normal.
+							cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+							cp.m_localPointB = colObj0Wrap->getWorldTransform().invXform(cp.m_positionWorldOnB);
+						}
+					}
+				} 
+			}
+			
+
+		}
+	}
+
+#ifdef DEBUG_INTERNAL_EDGE
+	{
+		btVector3 color(0,1,1);
+		btDebugDrawLine(cp.getPositionWorldOnB(),cp.getPositionWorldOnB()+cp.m_normalWorldOnB*10,color);
+	}
+#endif //DEBUG_INTERNAL_EDGE
+
+	if (isNearEdge)
+	{
+
+		if (numConcaveEdgeHits>0)
+		{
+			if ((normalAdjustFlags & BT_TRIANGLE_CONCAVE_DOUBLE_SIDED)!=0)
+			{
+				//fix tri_normal so it pointing the same direction as the current local contact normal
+				if (tri_normal.dot(localContactNormalOnB) < 0)
+				{
+					tri_normal *= -1;
+				}
+				cp.m_normalWorldOnB = colObj0Wrap->getWorldTransform().getBasis()*tri_normal;
+			} else
+			{
+				btVector3 newNormal = tri_normal *frontFacing;
+				//if the tri_normal is pointing opposite direction as the current local contact normal, skip it
+				btScalar d = newNormal.dot(localContactNormalOnB) ;
+				if (d< 0)
+				{
+					return;
+				}
+				//modify the normal to be the triangle normal (or backfacing normal)
+				cp.m_normalWorldOnB = colObj0Wrap->getWorldTransform().getBasis() *newNormal;
+			}
+						
+			// Reproject collision point along normal.
+			cp.m_positionWorldOnB = cp.m_positionWorldOnA - cp.m_normalWorldOnB * cp.m_distance1;
+			cp.m_localPointB = colObj0Wrap->getWorldTransform().invXform(cp.m_positionWorldOnB);
+		}
+	}
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h
new file mode 100644
index 000000000..7d9aafeee
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btInternalEdgeUtility.h
@@ -0,0 +1,47 @@
+
+#ifndef BT_INTERNAL_EDGE_UTILITY_H
+#define BT_INTERNAL_EDGE_UTILITY_H
+
+#include "LinearMath/btHashMap.h"
+#include "LinearMath/btVector3.h"
+
+#include "BulletCollision/CollisionShapes/btTriangleInfoMap.h"
+
+///The btInternalEdgeUtility helps to avoid or reduce artifacts due to wrong collision normals caused by internal edges.
+///See also http://code.google.com/p/bullet/issues/detail?id=27
+
+class btBvhTriangleMeshShape;
+class btCollisionObject;
+struct btCollisionObjectWrapper;
+class btManifoldPoint;
+class btIDebugDraw;
+
+
+
+enum btInternalEdgeAdjustFlags
+{
+	BT_TRIANGLE_CONVEX_BACKFACE_MODE = 1,
+	BT_TRIANGLE_CONCAVE_DOUBLE_SIDED = 2, //double sided options are experimental, single sided is recommended
+	BT_TRIANGLE_CONVEX_DOUBLE_SIDED = 4
+};
+
+
+///Call btGenerateInternalEdgeInfo to create triangle info, store in the shape 'userInfo'
+void	btGenerateInternalEdgeInfo (btBvhTriangleMeshShape*trimeshShape, btTriangleInfoMap* triangleInfoMap);
+
+
+///Call the btFixMeshNormal to adjust the collision normal, using the triangle info map (generated using btGenerateInternalEdgeInfo)
+///If this info map is missing, or the triangle is not store in this map, nothing will be done
+void	btAdjustInternalEdgeContacts(btManifoldPoint& cp, const btCollisionObjectWrapper* trimeshColObj0Wrap,const btCollisionObjectWrapper* otherColObj1Wrap, int partId0, int index0, int normalAdjustFlags = 0);
+
+///Enable the BT_INTERNAL_EDGE_DEBUG_DRAW define and call btSetDebugDrawer, to get visual info to see if the internal edge utility works properly.
+///If the utility doesn't work properly, you might have to adjust the threshold values in btTriangleInfoMap
+//#define BT_INTERNAL_EDGE_DEBUG_DRAW
+
+#ifdef BT_INTERNAL_EDGE_DEBUG_DRAW
+void	btSetDebugDrawer(btIDebugDraw* debugDrawer);
+#endif //BT_INTERNAL_EDGE_DEBUG_DRAW
+
+
+#endif //BT_INTERNAL_EDGE_UTILITY_H
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.cpp
index e607bdbee..4b2986a00 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.cpp
@@ -17,13 +17,30 @@ subject to the following restrictions:
 #include "btManifoldResult.h"
 #include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
-
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 ///This is to allow MaterialCombiner/Custom Friction/Restitution values
 ContactAddedCallback		gContactAddedCallback=0;
 
+
+
 ///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback;
-inline btScalar	calculateCombinedFriction(const btCollisionObject* body0,const btCollisionObject* body1)
+inline btScalar	calculateCombinedRollingFriction(const btCollisionObject* body0,const btCollisionObject* body1)
+{
+	btScalar friction = body0->getRollingFriction() * body1->getRollingFriction();
+
+	const btScalar MAX_FRICTION  = btScalar(10.);
+	if (friction < -MAX_FRICTION)
+		friction = -MAX_FRICTION;
+	if (friction > MAX_FRICTION)
+		friction = MAX_FRICTION;
+	return friction;
+
+}
+
+
+///User can override this material combiner by implementing gContactAddedCallback and setting body0->m_collisionFlags |= btCollisionObject::customMaterialCallback;
+btScalar	btManifoldResult::calculateCombinedFriction(const btCollisionObject* body0,const btCollisionObject* body1)
 {
 	btScalar friction = body0->getFriction() * body1->getFriction();
 
@@ -36,17 +53,17 @@ inline btScalar	calculateCombinedFriction(const btCollisionObject* body0,const b
 
 }
 
-inline btScalar	calculateCombinedRestitution(const btCollisionObject* body0,const btCollisionObject* body1)
+btScalar	btManifoldResult::calculateCombinedRestitution(const btCollisionObject* body0,const btCollisionObject* body1)
 {
 	return body0->getRestitution() * body1->getRestitution();
 }
 
 
 
-btManifoldResult::btManifoldResult(btCollisionObject* body0,btCollisionObject* body1)
+btManifoldResult::btManifoldResult(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		:m_manifoldPtr(0),
-		m_body0(body0),
-		m_body1(body1)
+		m_body0Wrap(body0Wrap),
+		m_body1Wrap(body1Wrap)
 #ifdef DEBUG_PART_INDEX
 		,m_partId0(-1),
 	m_partId1(-1),
@@ -54,8 +71,6 @@ btManifoldResult::btManifoldResult(btCollisionObject* body0,btCollisionObject* b
 	m_index1(-1)
 #endif //DEBUG_PART_INDEX
 {
-	m_rootTransA = body0->getWorldTransform();
-	m_rootTransB = body1->getWorldTransform();
 }
 
 
@@ -63,11 +78,12 @@ void btManifoldResult::addContactPoint(const btVector3& normalOnBInWorld,const b
 {
 	btAssert(m_manifoldPtr);
 	//order in manifold needs to match
-	
+
 	if (depth > m_manifoldPtr->getContactBreakingThreshold())
+//	if (depth > m_manifoldPtr->getContactProcessingThreshold())
 		return;
 
-	bool isSwapped = m_manifoldPtr->getBody0() != m_body0;
+	bool isSwapped = m_manifoldPtr->getBody0() != m_body0Wrap->getCollisionObject();
 
 	btVector3 pointA = pointInWorld + normalOnBInWorld * depth;
 
@@ -76,12 +92,12 @@ void btManifoldResult::addContactPoint(const btVector3& normalOnBInWorld,const b
 	
 	if (isSwapped)
 	{
-		localA = m_rootTransB.invXform(pointA );
-		localB = m_rootTransA.invXform(pointInWorld);
+		localA = m_body1Wrap->getCollisionObject()->getWorldTransform().invXform(pointA );
+		localB = m_body0Wrap->getCollisionObject()->getWorldTransform().invXform(pointInWorld);
 	} else
 	{
-		localA = m_rootTransA.invXform(pointA );
-		localB = m_rootTransB.invXform(pointInWorld);
+		localA = m_body0Wrap->getCollisionObject()->getWorldTransform().invXform(pointA );
+		localB = m_body1Wrap->getCollisionObject()->getWorldTransform().invXform(pointInWorld);
 	}
 
 	btManifoldPoint newPt(localA,localB,normalOnBInWorld,depth);
@@ -90,9 +106,13 @@ void btManifoldResult::addContactPoint(const btVector3& normalOnBInWorld,const b
 	
 	int insertIndex = m_manifoldPtr->getCacheEntry(newPt);
 
-	newPt.m_combinedFriction = calculateCombinedFriction(m_body0,m_body1);
-	newPt.m_combinedRestitution = calculateCombinedRestitution(m_body0,m_body1);
+	newPt.m_combinedFriction = calculateCombinedFriction(m_body0Wrap->getCollisionObject(),m_body1Wrap->getCollisionObject());
+	newPt.m_combinedRestitution = calculateCombinedRestitution(m_body0Wrap->getCollisionObject(),m_body1Wrap->getCollisionObject());
+	newPt.m_combinedRollingFriction = calculateCombinedRollingFriction(m_body0Wrap->getCollisionObject(),m_body1Wrap->getCollisionObject());
+	btPlaneSpace1(newPt.m_normalWorldOnB,newPt.m_lateralFrictionDir1,newPt.m_lateralFrictionDir2);
+	
 
+	
    //BP mod, store contact triangles.
 	if (isSwapped)
 	{
@@ -121,13 +141,13 @@ void btManifoldResult::addContactPoint(const btVector3& normalOnBInWorld,const b
 	//User can override friction and/or restitution
 	if (gContactAddedCallback &&
 		//and if either of the two bodies requires custom material
-		 ((m_body0->getCollisionFlags() & btCollisionObject::CF_CUSTOM_MATERIAL_CALLBACK) ||
-		   (m_body1->getCollisionFlags() & btCollisionObject::CF_CUSTOM_MATERIAL_CALLBACK)))
+		 ((m_body0Wrap->getCollisionObject()->getCollisionFlags() & btCollisionObject::CF_CUSTOM_MATERIAL_CALLBACK) ||
+		   (m_body1Wrap->getCollisionObject()->getCollisionFlags() & btCollisionObject::CF_CUSTOM_MATERIAL_CALLBACK)))
 	{
 		//experimental feature info, for per-triangle material etc.
-		btCollisionObject* obj0 = isSwapped? m_body1 : m_body0;
-		btCollisionObject* obj1 = isSwapped? m_body0 : m_body1;
-		(*gContactAddedCallback)(m_manifoldPtr->getContactPoint(insertIndex),obj0,newPt.m_partId0,newPt.m_index0,obj1,newPt.m_partId1,newPt.m_index1);
+		const btCollisionObjectWrapper* obj0Wrap = isSwapped? m_body1Wrap : m_body0Wrap;
+		const btCollisionObjectWrapper* obj1Wrap = isSwapped? m_body0Wrap : m_body1Wrap;
+		(*gContactAddedCallback)(m_manifoldPtr->getContactPoint(insertIndex),obj0Wrap,newPt.m_partId0,newPt.m_index0,obj1Wrap,newPt.m_partId1,newPt.m_index1);
 	}
 
 }
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.h
index 978033997..977b9a02f 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btManifoldResult.h
@@ -14,18 +14,22 @@ subject to the following restrictions:
 */
 
 
-#ifndef MANIFOLD_RESULT_H
-#define MANIFOLD_RESULT_H
+#ifndef BT_MANIFOLD_RESULT_H
+#define BT_MANIFOLD_RESULT_H
 
 class btCollisionObject;
+struct btCollisionObjectWrapper;
+
 #include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
 class btManifoldPoint;
 
 #include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
 
 #include "LinearMath/btTransform.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 
-typedef bool (*ContactAddedCallback)(btManifoldPoint& cp,	const btCollisionObject* colObj0,int partId0,int index0,const btCollisionObject* colObj1,int partId1,int index1);
+typedef bool (*ContactAddedCallback)(btManifoldPoint& cp,	const btCollisionObjectWrapper* colObj0Wrap,int partId0,int index0,const btCollisionObjectWrapper* colObj1Wrap,int partId1,int index1);
 extern ContactAddedCallback		gContactAddedCallback;
 
 //#define DEBUG_PART_INDEX 1
@@ -34,14 +38,12 @@ extern ContactAddedCallback		gContactAddedCallback;
 ///btManifoldResult is a helper class to manage  contact results.
 class btManifoldResult : public btDiscreteCollisionDetectorInterface::Result
 {
+protected:
+
 	btPersistentManifold* m_manifoldPtr;
 
-	//we need this for compounds
-	btTransform	m_rootTransA;
-	btTransform	m_rootTransB;
-
-	btCollisionObject* m_body0;
-	btCollisionObject* m_body1;
+	const btCollisionObjectWrapper* m_body0Wrap;
+	const btCollisionObjectWrapper* m_body1Wrap;
 	int	m_partId0;
 	int m_partId1;
 	int m_index0;
@@ -61,7 +63,7 @@ public:
 	{
 	}
 
-	btManifoldResult(btCollisionObject* body0,btCollisionObject* body1);
+	btManifoldResult(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap);
 
 	virtual ~btManifoldResult() {};
 
@@ -100,27 +102,49 @@ public:
 		if (!m_manifoldPtr->getNumContacts())
 			return;
 
-		bool isSwapped = m_manifoldPtr->getBody0() != m_body0;
+		bool isSwapped = m_manifoldPtr->getBody0() != m_body0Wrap->getCollisionObject();
 
 		if (isSwapped)
 		{
-			m_manifoldPtr->refreshContactPoints(m_rootTransB,m_rootTransA);
+			m_manifoldPtr->refreshContactPoints(m_body1Wrap->getCollisionObject()->getWorldTransform(),m_body0Wrap->getCollisionObject()->getWorldTransform());
 		} else
 		{
-			m_manifoldPtr->refreshContactPoints(m_rootTransA,m_rootTransB);
+			m_manifoldPtr->refreshContactPoints(m_body0Wrap->getCollisionObject()->getWorldTransform(),m_body1Wrap->getCollisionObject()->getWorldTransform());
 		}
 	}
 
+	const btCollisionObjectWrapper* getBody0Wrap() const
+	{
+		return m_body0Wrap;
+	}
+	const btCollisionObjectWrapper* getBody1Wrap() const
+	{
+		return m_body1Wrap;
+	}
+
+	void setBody0Wrap(const btCollisionObjectWrapper* obj0Wrap)
+	{
+		m_body0Wrap = obj0Wrap;
+	}
+
+	void setBody1Wrap(const btCollisionObjectWrapper* obj1Wrap)
+	{
+		m_body1Wrap = obj1Wrap;
+	}
+
 	const btCollisionObject* getBody0Internal() const
 	{
-		return m_body0;
+		return m_body0Wrap->getCollisionObject();
 	}
 
 	const btCollisionObject* getBody1Internal() const
 	{
-		return m_body1;
+		return m_body1Wrap->getCollisionObject();
 	}
-	
+
+	/// in the future we can let the user override the methods to combine restitution and friction
+	static btScalar	calculateCombinedRestitution(const btCollisionObject* body0,const btCollisionObject* body1);
+	static btScalar	calculateCombinedFriction(const btCollisionObject* body0,const btCollisionObject* body1);
 };
 
-#endif //MANIFOLD_RESULT_H
+#endif //BT_MANIFOLD_RESULT_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp
index 0328d0f73..134478225 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.cpp
@@ -1,3 +1,4 @@
+
 /*
 Bullet Continuous Collision Detection and Physics Library
 Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
@@ -44,10 +45,14 @@ void btSimulationIslandManager::findUnions(btDispatcher* /* dispatcher */,btColl
 {
 	
 	{
-		
-		for (int i=0;i<colWorld->getPairCache()->getNumOverlappingPairs();i++)
+		btOverlappingPairCache* pairCachePtr = colWorld->getPairCache();
+		const int numOverlappingPairs = pairCachePtr->getNumOverlappingPairs();
+		if (numOverlappingPairs)
+		{
+		btBroadphasePair* pairPtr = pairCachePtr->getOverlappingPairArrayPtr();
+		
+		for (int i=0;i<numOverlappingPairs;i++)
 		{
-			btBroadphasePair* pairPtr = colWorld->getPairCache()->getOverlappingPairArrayPtr();
 			const btBroadphasePair& collisionPair = pairPtr[i];
 			btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
 			btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
@@ -60,18 +65,73 @@ void btSimulationIslandManager::findUnions(btDispatcher* /* dispatcher */,btColl
 					(colObj1)->getIslandTag());
 			}
 		}
+		}
+	}
+}
+
+#ifdef STATIC_SIMULATION_ISLAND_OPTIMIZATION
+void   btSimulationIslandManager::updateActivationState(btCollisionWorld* colWorld,btDispatcher* dispatcher)
+{
+
+	// put the index into m_controllers into m_tag   
+	int index = 0;
+	{
+
+		int i;
+		for (i=0;i<colWorld->getCollisionObjectArray().size(); i++)
+		{
+			btCollisionObject*   collisionObject= colWorld->getCollisionObjectArray()[i];
+			//Adding filtering here
+			if (!collisionObject->isStaticOrKinematicObject())
+			{
+				collisionObject->setIslandTag(index++);
+			}
+			collisionObject->setCompanionId(-1);
+			collisionObject->setHitFraction(btScalar(1.));
+		}
+	}
+	// do the union find
+
+	initUnionFind( index );
+
+	findUnions(dispatcher,colWorld);
+}
+
+void   btSimulationIslandManager::storeIslandActivationState(btCollisionWorld* colWorld)
+{
+	// put the islandId ('find' value) into m_tag   
+	{
+		int index = 0;
+		int i;
+		for (i=0;i<colWorld->getCollisionObjectArray().size();i++)
+		{
+			btCollisionObject* collisionObject= colWorld->getCollisionObjectArray()[i];
+			if (!collisionObject->isStaticOrKinematicObject())
+			{
+				collisionObject->setIslandTag( m_unionFind.find(index) );
+				//Set the correct object offset in Collision Object Array
+				m_unionFind.getElement(index).m_sz = i;
+				collisionObject->setCompanionId(-1);
+				index++;
+			} else
+			{
+				collisionObject->setIslandTag(-1);
+				collisionObject->setCompanionId(-2);
+			}
+		}
 	}
 }
 
 
+#else //STATIC_SIMULATION_ISLAND_OPTIMIZATION
 void	btSimulationIslandManager::updateActivationState(btCollisionWorld* colWorld,btDispatcher* dispatcher)
 {
-	
+
 	initUnionFind( int (colWorld->getCollisionObjectArray().size()));
-	
+
 	// put the index into m_controllers into m_tag	
 	{
-		
+
 		int index = 0;
 		int i;
 		for (i=0;i<colWorld->getCollisionObjectArray().size(); i++)
@@ -81,26 +141,20 @@ void	btSimulationIslandManager::updateActivationState(btCollisionWorld* colWorld
 			collisionObject->setCompanionId(-1);
 			collisionObject->setHitFraction(btScalar(1.));
 			index++;
-			
+
 		}
 	}
 	// do the union find
-	
+
 	findUnions(dispatcher,colWorld);
-	
-
-	
 }
 
-
-
-
 void	btSimulationIslandManager::storeIslandActivationState(btCollisionWorld* colWorld)
 {
 	// put the islandId ('find' value) into m_tag	
 	{
-		
-		
+
+
 		int index = 0;
 		int i;
 		for (i=0;i<colWorld->getCollisionObjectArray().size();i++)
@@ -120,6 +174,8 @@ void	btSimulationIslandManager::storeIslandActivationState(btCollisionWorld* col
 	}
 }
 
+#endif //STATIC_SIMULATION_ISLAND_OPTIMIZATION
+
 inline	int	getIslandId(const btPersistentManifold* lhs)
 {
 	int islandId;
@@ -137,7 +193,7 @@ class btPersistentManifoldSortPredicate
 {
 	public:
 
-		SIMD_FORCE_INLINE bool operator() ( const btPersistentManifold* lhs, const btPersistentManifold* rhs )
+		SIMD_FORCE_INLINE bool operator() ( const btPersistentManifold* lhs, const btPersistentManifold* rhs ) const
 		{
 			return getIslandId(lhs) < getIslandId(rhs);
 		}
@@ -263,8 +319,8 @@ void btSimulationIslandManager::buildIslands(btDispatcher* dispatcher,btCollisio
 	{
 		 btPersistentManifold* manifold = dispatcher->getManifoldByIndexInternal(i);
 		 
-		 btCollisionObject* colObj0 = static_cast<btCollisionObject*>(manifold->getBody0());
-		 btCollisionObject* colObj1 = static_cast<btCollisionObject*>(manifold->getBody1());
+		 const btCollisionObject* colObj0 = static_cast<const btCollisionObject*>(manifold->getBody0());
+		 const btCollisionObject* colObj1 = static_cast<const btCollisionObject*>(manifold->getBody1());
 		
 		 ///@todo: check sleeping conditions!
 		 if (((colObj0) && colObj0->getActivationState() != ISLAND_SLEEPING) ||
@@ -274,11 +330,13 @@ void btSimulationIslandManager::buildIslands(btDispatcher* dispatcher,btCollisio
 			//kinematic objects don't merge islands, but wake up all connected objects
 			if (colObj0->isKinematicObject() && colObj0->getActivationState() != ISLAND_SLEEPING)
 			{
-				colObj1->activate();
+				if (colObj0->hasContactResponse())
+					colObj1->activate();
 			}
 			if (colObj1->isKinematicObject() && colObj1->getActivationState() != ISLAND_SLEEPING)
 			{
-				colObj0->activate();
+				if (colObj1->hasContactResponse())
+					colObj0->activate();
 			}
 			if(m_splitIslands)
 			{ 
@@ -309,7 +367,7 @@ void btSimulationIslandManager::buildAndProcessIslands(btDispatcher* dispatcher,
 	{
 		btPersistentManifold** manifold = dispatcher->getInternalManifoldPointer();
 		int maxNumManifolds = dispatcher->getNumManifolds();
-		callback->ProcessIsland(&collisionObjects[0],collisionObjects.size(),manifold,maxNumManifolds, -1);
+		callback->processIsland(&collisionObjects[0],collisionObjects.size(),manifold,maxNumManifolds, -1);
 	}
 	else
 	{
@@ -319,8 +377,10 @@ void btSimulationIslandManager::buildAndProcessIslands(btDispatcher* dispatcher,
 
 		int numManifolds = int (m_islandmanifold.size());
 
-		//we should do radix sort, it it much faster (O(n) instead of O (n log2(n))
+		//tried a radix sort, but quicksort/heapsort seems still faster
+		//@todo rewrite island management
 		m_islandmanifold.quickSort(btPersistentManifoldSortPredicate());
+		//m_islandmanifold.heapSort(btPersistentManifoldSortPredicate());
 
 		//now process all active islands (sets of manifolds for now)
 
@@ -339,15 +399,15 @@ void btSimulationIslandManager::buildAndProcessIslands(btDispatcher* dispatcher,
 			int islandId = getUnionFind().getElement(startIslandIndex).m_id;
 
 
-			   bool islandSleeping = false;
+			   bool islandSleeping = true;
 	                
 					for (endIslandIndex = startIslandIndex;(endIslandIndex<numElem) && (getUnionFind().getElement(endIslandIndex).m_id == islandId);endIslandIndex++)
 					{
 							int i = getUnionFind().getElement(endIslandIndex).m_sz;
 							btCollisionObject* colObj0 = collisionObjects[i];
 							m_islandBodies.push_back(colObj0);
-							if (!colObj0->isActive())
-									islandSleeping = true;
+							if (colObj0->isActive())
+									islandSleeping = false;
 					}
 	                
 
@@ -374,7 +434,7 @@ void btSimulationIslandManager::buildAndProcessIslands(btDispatcher* dispatcher,
 
 			if (!islandSleeping)
 			{
-				callback->ProcessIsland(&m_islandBodies[0],m_islandBodies.size(),startManifold,numIslandManifolds, islandId);
+				callback->processIsland(&m_islandBodies[0],m_islandBodies.size(),startManifold,numIslandManifolds, islandId);
 	//			printf("Island callback of size:%d bodies, %d manifolds\n",islandBodies.size(),numIslandManifolds);
 			}
 			
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.h
index d059f5d6b..e24c6afec 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSimulationIslandManager.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SIMULATION_ISLAND_MANAGER_H
-#define SIMULATION_ISLAND_MANAGER_H
+#ifndef BT_SIMULATION_ISLAND_MANAGER_H
+#define BT_SIMULATION_ISLAND_MANAGER_H
 
 #include "BulletCollision/CollisionDispatch/btUnionFind.h"
 #include "btCollisionCreateFunc.h"
@@ -59,7 +59,7 @@ public:
 	{
 		virtual ~IslandCallback() {};
 
-		virtual	void	ProcessIsland(btCollisionObject** bodies,int numBodies,class btPersistentManifold**	manifolds,int numManifolds, int islandId) = 0;
+		virtual	void	processIsland(btCollisionObject** bodies,int numBodies,class btPersistentManifold**	manifolds,int numManifolds, int islandId) = 0;
 	};
 
 	void	buildAndProcessIslands(btDispatcher* dispatcher,btCollisionWorld* collisionWorld, IslandCallback* callback);
@@ -77,5 +77,5 @@ public:
 
 };
 
-#endif //SIMULATION_ISLAND_MANAGER_H
+#endif //BT_SIMULATION_ISLAND_MANAGER_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp
index 8df876928..e8b567e0e 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp
@@ -18,20 +18,21 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btSphereShape.h"
 #include "BulletCollision/CollisionShapes/btBoxShape.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 //#include <stdio.h>
 
-btSphereBoxCollisionAlgorithm::btSphereBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped)
-: btActivatingCollisionAlgorithm(ci,col0,col1),
+btSphereBoxCollisionAlgorithm::btSphereBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* col0Wrap,const btCollisionObjectWrapper* col1Wrap, bool isSwapped)
+: btActivatingCollisionAlgorithm(ci,col0Wrap,col1Wrap),
 m_ownManifold(false),
 m_manifoldPtr(mf),
 m_isSwapped(isSwapped)
 {
-	btCollisionObject* sphereObj = m_isSwapped? col1 : col0;
-	btCollisionObject* boxObj = m_isSwapped? col0 : col1;
+	const btCollisionObjectWrapper* sphereObjWrap = m_isSwapped? col1Wrap : col0Wrap;
+	const btCollisionObjectWrapper* boxObjWrap = m_isSwapped? col0Wrap : col1Wrap;
 	
-	if (!m_manifoldPtr && m_dispatcher->needsCollision(sphereObj,boxObj))
+	if (!m_manifoldPtr && m_dispatcher->needsCollision(sphereObjWrap->getCollisionObject(),boxObjWrap->getCollisionObject()))
 	{
-		m_manifoldPtr = m_dispatcher->getNewManifold(sphereObj,boxObj);
+		m_manifoldPtr = m_dispatcher->getNewManifold(sphereObjWrap->getCollisionObject(),boxObjWrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 }
@@ -48,36 +49,31 @@ btSphereBoxCollisionAlgorithm::~btSphereBoxCollisionAlgorithm()
 
 
 
-void btSphereBoxCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btSphereBoxCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap, const btCollisionObjectWrapper* body1Wrap, const btDispatcherInfo& dispatchInfo, btManifoldResult* resultOut)
 {
 	(void)dispatchInfo;
 	(void)resultOut;
 	if (!m_manifoldPtr)
 		return;
 
-	btCollisionObject* sphereObj = m_isSwapped? body1 : body0;
-	btCollisionObject* boxObj = m_isSwapped? body0 : body1;
+	const btCollisionObjectWrapper* sphereObjWrap = m_isSwapped? body1Wrap : body0Wrap;
+	const btCollisionObjectWrapper* boxObjWrap = m_isSwapped? body0Wrap : body1Wrap;
 
-
-	btSphereShape* sphere0 = (btSphereShape*)sphereObj->getCollisionShape();
+	btVector3 pOnBox;
 
 	btVector3 normalOnSurfaceB;
-	btVector3 pOnBox,pOnSphere;
-	btVector3 sphereCenter = sphereObj->getWorldTransform().getOrigin();
+	btScalar penetrationDepth;
+	btVector3 sphereCenter = sphereObjWrap->getWorldTransform().getOrigin();
+	const btSphereShape* sphere0 = (const btSphereShape*)sphereObjWrap->getCollisionShape();
 	btScalar radius = sphere0->getRadius();
-	
-	btScalar dist = getSphereDistance(boxObj,pOnBox,pOnSphere,sphereCenter,radius);
+	btScalar maxContactDistance = m_manifoldPtr->getContactBreakingThreshold();
 
 	resultOut->setPersistentManifold(m_manifoldPtr);
 
-	if (dist < SIMD_EPSILON)
+	if (getSphereDistance(boxObjWrap, pOnBox, normalOnSurfaceB, penetrationDepth, sphereCenter, radius, maxContactDistance))
 	{
-		btVector3 normalOnSurfaceB = (pOnBox- pOnSphere).normalize();
-
 		/// report a contact. internally this will be kept persistent, and contact reduction is done
-
-		resultOut->addContactPoint(normalOnSurfaceB,pOnBox,dist);
-		
+		resultOut->addContactPoint(normalOnSurfaceB, pOnBox, penetrationDepth);
 	}
 
 	if (m_ownManifold)
@@ -102,159 +98,117 @@ btScalar btSphereBoxCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject*
 }
 
 
-btScalar btSphereBoxCollisionAlgorithm::getSphereDistance(btCollisionObject* boxObj, btVector3& pointOnBox, btVector3& v3PointOnSphere, const btVector3& sphereCenter, btScalar fRadius ) 
+bool btSphereBoxCollisionAlgorithm::getSphereDistance(const btCollisionObjectWrapper* boxObjWrap, btVector3& pointOnBox, btVector3& normal, btScalar& penetrationDepth, const btVector3& sphereCenter, btScalar fRadius, btScalar maxContactDistance ) 
 {
+	const btBoxShape* boxShape= (const btBoxShape*)boxObjWrap->getCollisionShape();
+	btVector3 const &boxHalfExtent = boxShape->getHalfExtentsWithoutMargin();
+	btScalar boxMargin = boxShape->getMargin();
+	penetrationDepth = 1.0f;
 
-	btScalar margins;
-	btVector3 bounds[2];
-	btBoxShape* boxShape= (btBoxShape*)boxObj->getCollisionShape();
+	// convert the sphere position to the box's local space
+	btTransform const &m44T = boxObjWrap->getWorldTransform();
+	btVector3 sphereRelPos = m44T.invXform(sphereCenter);
+
+	// Determine the closest point to the sphere center in the box
+	btVector3 closestPoint = sphereRelPos;
+	closestPoint.setX( btMin(boxHalfExtent.getX(), closestPoint.getX()) );
+	closestPoint.setX( btMax(-boxHalfExtent.getX(), closestPoint.getX()) );
+	closestPoint.setY( btMin(boxHalfExtent.getY(), closestPoint.getY()) );
+	closestPoint.setY( btMax(-boxHalfExtent.getY(), closestPoint.getY()) );
+	closestPoint.setZ( btMin(boxHalfExtent.getZ(), closestPoint.getZ()) );
+	closestPoint.setZ( btMax(-boxHalfExtent.getZ(), closestPoint.getZ()) );
 	
-	bounds[0] = -boxShape->getHalfExtentsWithoutMargin();
-	bounds[1] = boxShape->getHalfExtentsWithoutMargin();
+	btScalar intersectionDist = fRadius + boxMargin;
+	btScalar contactDist = intersectionDist + maxContactDistance;
+	normal = sphereRelPos - closestPoint;
 
-	margins = boxShape->getMargin();//also add sphereShape margin?
-
-	const btTransform&	m44T = boxObj->getWorldTransform();
-
-	btVector3	boundsVec[2];
-	btScalar	fPenetration;
-
-	boundsVec[0] = bounds[0];
-	boundsVec[1] = bounds[1];
-
-	btVector3	marginsVec( margins, margins, margins );
-
-	// add margins
-	bounds[0] += marginsVec;
-	bounds[1] -= marginsVec;
-
-	/////////////////////////////////////////////////
-
-	btVector3	tmp, prel, n[6], normal, v3P;
-	btScalar   fSep = btScalar(10000000.0), fSepThis;
-
-	n[0].setValue( btScalar(-1.0),  btScalar(0.0),  btScalar(0.0) );
-	n[1].setValue(  btScalar(0.0), btScalar(-1.0),  btScalar(0.0) );
-	n[2].setValue(  btScalar(0.0),  btScalar(0.0), btScalar(-1.0) );
-	n[3].setValue(  btScalar(1.0),  btScalar(0.0),  btScalar(0.0) );
-	n[4].setValue(  btScalar(0.0),  btScalar(1.0),  btScalar(0.0) );
-	n[5].setValue(  btScalar(0.0),  btScalar(0.0),  btScalar(1.0) );
-
-	// convert  point in local space
-	prel = m44T.invXform( sphereCenter);
-	
-	bool	bFound = false;
-
-	v3P = prel;
-
-	for (int i=0;i<6;i++)
+	//if there is no penetration, we are done
+	btScalar dist2 = normal.length2();
+	if (dist2 > contactDist * contactDist)
 	{
-		int j = i<3? 0:1;
-		if ( (fSepThis = ((v3P-bounds[j]) .dot(n[i]))) > btScalar(0.0) )
-		{
-			v3P = v3P - n[i]*fSepThis;		
-			bFound = true;
-		}
-	}
-	
-	//
-
-	if ( bFound )
-	{
-		bounds[0] = boundsVec[0];
-		bounds[1] = boundsVec[1];
-
-		normal = (prel - v3P).normalize();
-		pointOnBox = v3P + normal*margins;
-		v3PointOnSphere = prel - normal*fRadius;
-
-		if ( ((v3PointOnSphere - pointOnBox) .dot (normal)) > btScalar(0.0) )
-		{
-			return btScalar(1.0);
-		}
-
-		// transform back in world space
-		tmp = m44T( pointOnBox);
-		pointOnBox    = tmp;
-		tmp  = m44T( v3PointOnSphere);		
-		v3PointOnSphere = tmp;
-		btScalar fSeps2 = (pointOnBox-v3PointOnSphere).length2();
-		
-		//if this fails, fallback into deeper penetration case, below
-		if (fSeps2 > SIMD_EPSILON)
-		{
-			fSep = - btSqrt(fSeps2);
-			normal = (pointOnBox-v3PointOnSphere);
-			normal *= btScalar(1.)/fSep;
-		}
-
-		return fSep;
+		return false;
 	}
 
-	//////////////////////////////////////////////////
-	// Deep penetration case
+	btScalar distance;
 
-	fPenetration = getSpherePenetration( boxObj,pointOnBox, v3PointOnSphere, sphereCenter, fRadius,bounds[0],bounds[1] );
-
-	bounds[0] = boundsVec[0];
-	bounds[1] = boundsVec[1];
-
-	if ( fPenetration <= btScalar(0.0) )
-		return (fPenetration-margins);
-	else
-		return btScalar(1.0);
-}
-
-btScalar btSphereBoxCollisionAlgorithm::getSpherePenetration( btCollisionObject* boxObj,btVector3& pointOnBox, btVector3& v3PointOnSphere, const btVector3& sphereCenter, btScalar fRadius, const btVector3& aabbMin, const btVector3& aabbMax) 
-{
-
-	btVector3 bounds[2];
-
-	bounds[0] = aabbMin;
-	bounds[1] = aabbMax;
-
-	btVector3	p0, tmp, prel, n[6], normal;
-	btScalar   fSep = btScalar(-10000000.0), fSepThis;
-
-	// set p0 and normal to a default value to shup up GCC
-	p0.setValue(btScalar(0.), btScalar(0.), btScalar(0.));
-	normal.setValue(btScalar(0.), btScalar(0.), btScalar(0.));
-
-	n[0].setValue( btScalar(-1.0),  btScalar(0.0),  btScalar(0.0) );
-	n[1].setValue(  btScalar(0.0), btScalar(-1.0),  btScalar(0.0) );
-	n[2].setValue(  btScalar(0.0),  btScalar(0.0), btScalar(-1.0) );
-	n[3].setValue(  btScalar(1.0),  btScalar(0.0),  btScalar(0.0) );
-	n[4].setValue(  btScalar(0.0),  btScalar(1.0),  btScalar(0.0) );
-	n[5].setValue(  btScalar(0.0),  btScalar(0.0),  btScalar(1.0) );
-
-	const btTransform&	m44T = boxObj->getWorldTransform();
-
-	// convert  point in local space
-	prel = m44T.invXform( sphereCenter);
-
-	///////////
-
-	for (int i=0;i<6;i++)
+	//special case if the sphere center is inside the box
+	if (dist2 <= SIMD_EPSILON)
 	{
-		int j = i<3 ? 0:1;
-		if ( (fSepThis = ((prel-bounds[j]) .dot( n[i]))-fRadius) > btScalar(0.0) )	return btScalar(1.0);
-		if ( fSepThis > fSep )
-		{
-			p0 = bounds[j];	normal = (btVector3&)n[i];
-			fSep = fSepThis;
-		}
+		distance = -getSpherePenetration(boxHalfExtent, sphereRelPos, closestPoint, normal);
+	}
+	else //compute the penetration details
+	{
+		distance = normal.length();
+		normal /= distance;
 	}
 
-	pointOnBox = prel - normal*(normal.dot((prel-p0)));
-	v3PointOnSphere = pointOnBox + normal*fSep;
+	pointOnBox = closestPoint + normal * boxMargin;
+//	v3PointOnSphere = sphereRelPos - (normal * fRadius);	
+	penetrationDepth = distance - intersectionDist;
 
 	// transform back in world space
-	tmp  = m44T( pointOnBox);		
-	pointOnBox    = tmp;
-	tmp  = m44T( v3PointOnSphere);		v3PointOnSphere = tmp;
-	normal = (pointOnBox-v3PointOnSphere).normalize();
-
-	return fSep;
+	btVector3 tmp = m44T(pointOnBox);
+	pointOnBox = tmp;
+//	tmp = m44T(v3PointOnSphere);
+//	v3PointOnSphere = tmp;
+	tmp = m44T.getBasis() * normal;
+	normal = tmp;
 
+	return true;
 }
 
+btScalar btSphereBoxCollisionAlgorithm::getSpherePenetration( btVector3 const &boxHalfExtent, btVector3 const &sphereRelPos, btVector3 &closestPoint, btVector3& normal ) 
+{
+	//project the center of the sphere on the closest face of the box
+	btScalar faceDist = boxHalfExtent.getX() - sphereRelPos.getX();
+	btScalar minDist = faceDist;
+	closestPoint.setX( boxHalfExtent.getX() );
+	normal.setValue(btScalar(1.0f),  btScalar(0.0f),  btScalar(0.0f));
+
+	faceDist = boxHalfExtent.getX() + sphereRelPos.getX();
+	if (faceDist < minDist)
+	{
+		minDist = faceDist;
+		closestPoint = sphereRelPos;
+		closestPoint.setX( -boxHalfExtent.getX() );
+		normal.setValue(btScalar(-1.0f),  btScalar(0.0f),  btScalar(0.0f));
+	}
+
+	faceDist = boxHalfExtent.getY() - sphereRelPos.getY();
+	if (faceDist < minDist)
+	{
+		minDist = faceDist;
+		closestPoint = sphereRelPos;
+		closestPoint.setY( boxHalfExtent.getY() );
+		normal.setValue(btScalar(0.0f),  btScalar(1.0f),  btScalar(0.0f));
+	}
+
+	faceDist = boxHalfExtent.getY() + sphereRelPos.getY();
+	if (faceDist < minDist)
+	{
+		minDist = faceDist;
+		closestPoint = sphereRelPos;
+		closestPoint.setY( -boxHalfExtent.getY() );
+		normal.setValue(btScalar(0.0f),  btScalar(-1.0f),  btScalar(0.0f));
+	}
+
+	faceDist = boxHalfExtent.getZ() - sphereRelPos.getZ();
+	if (faceDist < minDist)
+	{
+		minDist = faceDist;
+		closestPoint = sphereRelPos;
+		closestPoint.setZ( boxHalfExtent.getZ() );
+		normal.setValue(btScalar(0.0f),  btScalar(0.0f),  btScalar(1.0f));
+	}
+
+	faceDist = boxHalfExtent.getZ() + sphereRelPos.getZ();
+	if (faceDist < minDist)
+	{
+		minDist = faceDist;
+		closestPoint = sphereRelPos;
+		closestPoint.setZ( -boxHalfExtent.getZ() );
+		normal.setValue(btScalar(0.0f),  btScalar(0.0f),  btScalar(-1.0f));
+	}
+
+	return minDist;
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h
index 47111d1c4..eefaedc9e 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPHERE_BOX_COLLISION_ALGORITHM_H
-#define SPHERE_BOX_COLLISION_ALGORITHM_H
+#ifndef BT_SPHERE_BOX_COLLISION_ALGORITHM_H
+#define BT_SPHERE_BOX_COLLISION_ALGORITHM_H
 
 #include "btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -34,11 +34,11 @@ class btSphereBoxCollisionAlgorithm : public btActivatingCollisionAlgorithm
 	
 public:
 
-	btSphereBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped);
+	btSphereBoxCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap, bool isSwapped);
 
 	virtual ~btSphereBoxCollisionAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -50,26 +50,26 @@ public:
 		}
 	}
 
-	btScalar getSphereDistance( btCollisionObject* boxObj,btVector3& v3PointOnBox, btVector3& v3PointOnSphere, const btVector3& v3SphereCenter, btScalar fRadius );
+	bool getSphereDistance( const btCollisionObjectWrapper* boxObjWrap, btVector3& v3PointOnBox, btVector3& normal, btScalar& penetrationDepth, const btVector3& v3SphereCenter, btScalar fRadius, btScalar maxContactDistance );
 
-	btScalar getSpherePenetration( btCollisionObject* boxObj, btVector3& v3PointOnBox, btVector3& v3PointOnSphere, const btVector3& v3SphereCenter, btScalar fRadius, const btVector3& aabbMin, const btVector3& aabbMax);
+	btScalar getSpherePenetration( btVector3 const &boxHalfExtent, btVector3 const &sphereRelPos, btVector3 &closestPoint, btVector3& normal );
 	
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSphereBoxCollisionAlgorithm));
 			if (!m_swapped)
 			{
-				return new(mem) btSphereBoxCollisionAlgorithm(0,ci,body0,body1,false);
+				return new(mem) btSphereBoxCollisionAlgorithm(0,ci,body0Wrap,body1Wrap,false);
 			} else
 			{
-				return new(mem) btSphereBoxCollisionAlgorithm(0,ci,body0,body1,true);
+				return new(mem) btSphereBoxCollisionAlgorithm(0,ci,body0Wrap,body1Wrap,true);
 			}
 		}
 	};
 
 };
 
-#endif //SPHERE_BOX_COLLISION_ALGORITHM_H
+#endif //BT_SPHERE_BOX_COLLISION_ALGORITHM_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp
index 5c4e78fe5..36ba21f5b 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp
@@ -17,15 +17,16 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
 #include "BulletCollision/CollisionShapes/btSphereShape.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
-btSphereSphereCollisionAlgorithm::btSphereSphereCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1)
-: btActivatingCollisionAlgorithm(ci,col0,col1),
+btSphereSphereCollisionAlgorithm::btSphereSphereCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* col0Wrap,const btCollisionObjectWrapper* col1Wrap)
+: btActivatingCollisionAlgorithm(ci,col0Wrap,col1Wrap),
 m_ownManifold(false),
 m_manifoldPtr(mf)
 {
 	if (!m_manifoldPtr)
 	{
-		m_manifoldPtr = m_dispatcher->getNewManifold(col0,col1);
+		m_manifoldPtr = m_dispatcher->getNewManifold(col0Wrap->getCollisionObject(),col1Wrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 }
@@ -39,7 +40,7 @@ btSphereSphereCollisionAlgorithm::~btSphereSphereCollisionAlgorithm()
 	}
 }
 
-void btSphereSphereCollisionAlgorithm::processCollision (btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btSphereSphereCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* col0Wrap,const btCollisionObjectWrapper* col1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	(void)dispatchInfo;
 
@@ -48,10 +49,10 @@ void btSphereSphereCollisionAlgorithm::processCollision (btCollisionObject* col0
 
 	resultOut->setPersistentManifold(m_manifoldPtr);
 
-	btSphereShape* sphere0 = (btSphereShape*)col0->getCollisionShape();
-	btSphereShape* sphere1 = (btSphereShape*)col1->getCollisionShape();
+	btSphereShape* sphere0 = (btSphereShape*)col0Wrap->getCollisionShape();
+	btSphereShape* sphere1 = (btSphereShape*)col1Wrap->getCollisionShape();
 
-	btVector3 diff = col0->getWorldTransform().getOrigin()-  col1->getWorldTransform().getOrigin();
+	btVector3 diff = col0Wrap->getWorldTransform().getOrigin()-  col1Wrap->getWorldTransform().getOrigin();
 	btScalar len = diff.length();
 	btScalar radius0 = sphere0->getRadius();
 	btScalar radius1 = sphere1->getRadius();
@@ -80,7 +81,7 @@ void btSphereSphereCollisionAlgorithm::processCollision (btCollisionObject* col0
 	///point on A (worldspace)
 	///btVector3 pos0 = col0->getWorldTransform().getOrigin() - radius0 * normalOnSurfaceB;
 	///point on B (worldspace)
-	btVector3 pos1 = col1->getWorldTransform().getOrigin() + radius1* normalOnSurfaceB;
+	btVector3 pos1 = col1Wrap->getWorldTransform().getOrigin() + radius1* normalOnSurfaceB;
 
 	/// report a contact. internally this will be kept persistent, and contact reduction is done
 	
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h
index 7d07512ca..3517a568a 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPHERE_SPHERE_COLLISION_ALGORITHM_H
-#define SPHERE_SPHERE_COLLISION_ALGORITHM_H
+#ifndef BT_SPHERE_SPHERE_COLLISION_ALGORITHM_H
+#define BT_SPHERE_SPHERE_COLLISION_ALGORITHM_H
 
 #include "btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -32,12 +32,12 @@ class btSphereSphereCollisionAlgorithm : public btActivatingCollisionAlgorithm
 	btPersistentManifold*	m_manifoldPtr;
 	
 public:
-	btSphereSphereCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+	btSphereSphereCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* col0Wrap,const btCollisionObjectWrapper* col1Wrap);
 
 	btSphereSphereCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
 		: btActivatingCollisionAlgorithm(ci) {}
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -53,14 +53,14 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* col0Wrap,const btCollisionObjectWrapper* col1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSphereSphereCollisionAlgorithm));
-			return new(mem) btSphereSphereCollisionAlgorithm(0,ci,body0,body1);
+			return new(mem) btSphereSphereCollisionAlgorithm(0,ci,col0Wrap,col1Wrap);
 		}
 	};
 
 };
 
-#endif //SPHERE_SPHERE_COLLISION_ALGORITHM_H
+#endif //BT_SPHERE_SPHERE_COLLISION_ALGORITHM_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp
index c327c3ff7..280a4d355 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp
@@ -19,17 +19,17 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btSphereShape.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "SphereTriangleDetector.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
-
-btSphereTriangleCollisionAlgorithm::btSphereTriangleCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1,bool swapped)
-: btActivatingCollisionAlgorithm(ci,col0,col1),
+btSphereTriangleCollisionAlgorithm::btSphereTriangleCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool swapped)
+: btActivatingCollisionAlgorithm(ci,body0Wrap,body1Wrap),
 m_ownManifold(false),
 m_manifoldPtr(mf),
 m_swapped(swapped)
 {
 	if (!m_manifoldPtr)
 	{
-		m_manifoldPtr = m_dispatcher->getNewManifold(col0,col1);
+		m_manifoldPtr = m_dispatcher->getNewManifold(body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject());
 		m_ownManifold = true;
 	}
 }
@@ -43,16 +43,16 @@ btSphereTriangleCollisionAlgorithm::~btSphereTriangleCollisionAlgorithm()
 	}
 }
 
-void btSphereTriangleCollisionAlgorithm::processCollision (btCollisionObject* col0,btCollisionObject* col1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btSphereTriangleCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* col0Wrap,const btCollisionObjectWrapper* col1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	if (!m_manifoldPtr)
 		return;
 
-	btCollisionObject* sphereObj = m_swapped? col1 : col0;
-	btCollisionObject* triObj = m_swapped? col0 : col1;
+	const btCollisionObjectWrapper* sphereObjWrap = m_swapped? col1Wrap : col0Wrap;
+	const btCollisionObjectWrapper* triObjWrap = m_swapped? col0Wrap : col1Wrap;
 
-	btSphereShape* sphere = (btSphereShape*)sphereObj->getCollisionShape();
-	btTriangleShape* triangle = (btTriangleShape*)triObj->getCollisionShape();
+	btSphereShape* sphere = (btSphereShape*)sphereObjWrap->getCollisionShape();
+	btTriangleShape* triangle = (btTriangleShape*)triObjWrap->getCollisionShape();
 	
 	/// report a contact. internally this will be kept persistent, and contact reduction is done
 	resultOut->setPersistentManifold(m_manifoldPtr);
@@ -60,8 +60,8 @@ void btSphereTriangleCollisionAlgorithm::processCollision (btCollisionObject* co
 	
 	btDiscreteCollisionDetectorInterface::ClosestPointInput input;
 	input.m_maximumDistanceSquared = btScalar(BT_LARGE_FLOAT);///@todo: tighter bounds
-	input.m_transformA = sphereObj->getWorldTransform();
-	input.m_transformB = triObj->getWorldTransform();
+	input.m_transformA = sphereObjWrap->getWorldTransform();
+	input.m_transformB = triObjWrap->getWorldTransform();
 
 	bool swapResults = m_swapped;
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h
index 606c3635a..6b6e39a72 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
-#define SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
+#ifndef BT_SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
+#define BT_SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
 
 #include "btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -32,12 +32,12 @@ class btSphereTriangleCollisionAlgorithm : public btActivatingCollisionAlgorithm
 	bool	m_swapped;
 	
 public:
-	btSphereTriangleCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool swapped);
+	btSphereTriangleCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool swapped);
 
 	btSphereTriangleCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
 		: btActivatingCollisionAlgorithm(ci) {}
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -54,16 +54,16 @@ public:
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
 		
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSphereTriangleCollisionAlgorithm));
 
-			return new(mem) btSphereTriangleCollisionAlgorithm(ci.m_manifold,ci,body0,body1,m_swapped);
+			return new(mem) btSphereTriangleCollisionAlgorithm(ci.m_manifold,ci,body0Wrap,body1Wrap,m_swapped);
 		}
 	};
 
 };
 
-#endif //SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
+#endif //BT_SPHERE_TRIANGLE_COLLISION_ALGORITHM_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.cpp
index c561df061..522293359 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.cpp
@@ -53,7 +53,7 @@ class btUnionFindElementSortPredicate
 {
 	public:
 
-		bool operator() ( const btElement& lhs, const btElement& rhs )
+		bool operator() ( const btElement& lhs, const btElement& rhs ) const
 		{
 			return lhs.m_id < rhs.m_id;
 		}
@@ -70,7 +70,9 @@ void	btUnionFind::sortIslands()
 	for (int i=0;i<numElements;i++)
 	{
 		m_elements[i].m_id = find(i);
+#ifndef STATIC_SIMULATION_ISLAND_OPTIMIZATION
 		m_elements[i].m_sz = i;
+#endif //STATIC_SIMULATION_ISLAND_OPTIMIZATION
 	}
 	
 	 // Sort the vector using predicate and std::sort
@@ -78,4 +80,3 @@ void	btUnionFind::sortIslands()
 	  m_elements.quickSort(btUnionFindElementSortPredicate());
 
 }
-
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.h b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.h
index e105ecbff..ef2a29202 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionDispatch/btUnionFind.h
@@ -13,12 +13,15 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef UNION_FIND_H
-#define UNION_FIND_H
+#ifndef BT_UNION_FIND_H
+#define BT_UNION_FIND_H
 
 #include "LinearMath/btAlignedObjectArray.h"
 
-	#define USE_PATH_COMPRESSION 1
+#define USE_PATH_COMPRESSION 1
+
+///see for discussion of static island optimizations by Vroonsh here: http://code.google.com/p/bullet/issues/detail?id=406
+#define STATIC_SIMULATION_ISLAND_OPTIMIZATION 1
 
 struct	btElement
 {
@@ -106,10 +109,12 @@ class btUnionFind
 		//not really a reason not to use path compression, and it flattens the trees/improves find performance dramatically
 	
 		#ifdef USE_PATH_COMPRESSION
-				//
-				m_elements[x].m_id = m_elements[m_elements[x].m_id].m_id;
-		#endif //
+				const btElement* elementPtr = &m_elements[m_elements[x].m_id];
+				m_elements[x].m_id = elementPtr->m_id;
+				x = elementPtr->m_id;			
+		#else//
 				x = m_elements[x].m_id;
+		#endif		
 				//btAssert(x < m_N);
 				//btAssert(x >= 0);
 
@@ -121,4 +126,4 @@ class btUnionFind
   };
 
 
-#endif //UNION_FIND_H
+#endif //BT_UNION_FIND_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBox2dShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBox2dShape.h
index fc032069c..ce333783e 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBox2dShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBox2dShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef OBB_BOX_2D_SHAPE_H
-#define OBB_BOX_2D_SHAPE_H
+#ifndef BT_OBB_BOX_2D_SHAPE_H
+#define BT_OBB_BOX_2D_SHAPE_H
 
 #include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
 #include "BulletCollision/CollisionShapes/btCollisionMargin.h"
@@ -23,7 +23,7 @@ subject to the following restrictions:
 #include "LinearMath/btMinMax.h"
 
 ///The btBox2dShape is a box primitive around the origin, its sides axis aligned with length specified by half extents, in local shape coordinates. When used as part of a btCollisionObject or btRigidBody it will be an oriented box in world space.
-class btBox2dShape: public btPolyhedralConvexShape
+ATTRIBUTE_ALIGNED16(class) btBox2dShape: public btPolyhedralConvexShape
 {
 
 	//btVector3	m_boxHalfExtents1; //use m_implicitShapeDimensions instead
@@ -34,6 +34,8 @@ class btBox2dShape: public btPolyhedralConvexShape
 
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btVector3 getHalfExtentsWithMargin() const
 	{
 		btVector3 halfExtents = getHalfExtentsWithoutMargin();
@@ -83,6 +85,7 @@ public:
 	}
 
 
+	///a btBox2dShape is a flat 2D box in the X-Y plane (Z extents are zero)
 	btBox2dShape( const btVector3& boxHalfExtents) 
 		: btPolyhedralConvexShape(),
 		m_centroid(0,0,0)
@@ -97,6 +100,11 @@ public:
 		m_normals[2].setValue(0,1,0);
 		m_normals[3].setValue(-1,0,0);
 
+		btScalar minDimension = boxHalfExtents.getX();
+		if (minDimension>boxHalfExtents.getY())
+			minDimension = boxHalfExtents.getY();
+		setSafeMargin(minDimension);
+
 		m_shapeType = BOX_2D_SHAPE_PROXYTYPE;
 		btVector3 margin(getMargin(),getMargin(),getMargin());
 		m_implicitShapeDimensions = (boxHalfExtents * m_localScaling) - margin;
@@ -358,6 +366,6 @@ public:
 
 };
 
-#endif //OBB_BOX_2D_SHAPE_H
+#endif //BT_OBB_BOX_2D_SHAPE_H
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.cpp
index c6644efbe..3859138f1 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.cpp
@@ -14,8 +14,18 @@ subject to the following restrictions:
 */
 #include "btBoxShape.h"
 
+btBoxShape::btBoxShape( const btVector3& boxHalfExtents) 
+: btPolyhedralConvexShape()
+{
+	m_shapeType = BOX_SHAPE_PROXYTYPE;
+
+	setSafeMargin(boxHalfExtents);
+
+	btVector3 margin(getMargin(),getMargin(),getMargin());
+	m_implicitShapeDimensions = (boxHalfExtents * m_localScaling) - margin;
+};
+
 
-//{ 
 
 
 void btBoxShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.h
index c71318dd3..715e3f2ab 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBoxShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef OBB_BOX_MINKOWSKI_H
-#define OBB_BOX_MINKOWSKI_H
+#ifndef BT_OBB_BOX_MINKOWSKI_H
+#define BT_OBB_BOX_MINKOWSKI_H
 
 #include "btPolyhedralConvexShape.h"
 #include "btCollisionMargin.h"
@@ -23,7 +23,7 @@ subject to the following restrictions:
 #include "LinearMath/btMinMax.h"
 
 ///The btBoxShape is a box primitive around the origin, its sides axis aligned with length specified by half extents, in local shape coordinates. When used as part of a btCollisionObject or btRigidBody it will be an oriented box in world space.
-class btBoxShape: public btPolyhedralConvexShape
+ATTRIBUTE_ALIGNED16(class) btBoxShape: public btPolyhedralConvexShape
 {
 
 	//btVector3	m_boxHalfExtents1; //use m_implicitShapeDimensions instead
@@ -31,6 +31,8 @@ class btBoxShape: public btPolyhedralConvexShape
 
 public:
 
+BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btVector3 getHalfExtentsWithMargin() const
 	{
 		btVector3 halfExtents = getHalfExtentsWithoutMargin();
@@ -41,7 +43,7 @@ public:
 	
 	const btVector3& getHalfExtentsWithoutMargin() const
 	{
-		return m_implicitShapeDimensions;//changed in Bullet 2.63: assume the scaling and margin are included
+		return m_implicitShapeDimensions;//scaling is included, margin is not
 	}
 	
 
@@ -80,13 +82,7 @@ public:
 	}
 
 
-	btBoxShape( const btVector3& boxHalfExtents) 
-		: btPolyhedralConvexShape()
-	{
-		m_shapeType = BOX_SHAPE_PROXYTYPE;
-		btVector3 margin(getMargin(),getMargin(),getMargin());
-		m_implicitShapeDimensions = (boxHalfExtents * m_localScaling) - margin;
-	};
+	btBoxShape( const btVector3& boxHalfExtents);
 
 	virtual void setMargin(btScalar collisionMargin)
 	{
@@ -145,7 +141,7 @@ public:
 
 	virtual void getVertex(int i,btVector3& vtx) const
 	{
-		btVector3 halfExtents = getHalfExtentsWithoutMargin();
+		btVector3 halfExtents = getHalfExtentsWithMargin();
 
 		vtx = btVector3(
 				halfExtents.x() * (1-(i&1)) - halfExtents.x() * (i&1),
@@ -312,6 +308,7 @@ public:
 
 };
 
-#endif //OBB_BOX_MINKOWSKI_H
+
+#endif //BT_OBB_BOX_MINKOWSKI_H
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
index b8e76d6f8..ace4cfa26 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp
@@ -17,12 +17,14 @@ subject to the following restrictions:
 
 #include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
 #include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
+#include "LinearMath/btSerializer.h"
 
 ///Bvh Concave triangle mesh is a static-triangle mesh shape with Bounding Volume Hierarchy optimization.
 ///Uses an interface to access the triangles to allow for sharing graphics/physics triangles.
 btBvhTriangleMeshShape::btBvhTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression, bool buildBvh)
 :btTriangleMeshShape(meshInterface),
 m_bvh(0),
+m_triangleInfoMap(0),
 m_useQuantizedAabbCompression(useQuantizedAabbCompression),
 m_ownsBvh(false)
 {
@@ -30,22 +32,9 @@ m_ownsBvh(false)
 	//construct bvh from meshInterface
 #ifndef DISABLE_BVH
 
-	btVector3 bvhAabbMin,bvhAabbMax;
-	if(meshInterface->hasPremadeAabb())
-	{
-		meshInterface->getPremadeAabb(&bvhAabbMin, &bvhAabbMax);
-	}
-	else
-	{
-		meshInterface->calculateAabbBruteForce(bvhAabbMin,bvhAabbMax);
-	}
-	
 	if (buildBvh)
 	{
-		void* mem = btAlignedAlloc(sizeof(btOptimizedBvh),16);
-		m_bvh = new (mem) btOptimizedBvh();
-		m_bvh->build(meshInterface,m_useQuantizedAabbCompression,bvhAabbMin,bvhAabbMax);
-		m_ownsBvh = true;
+		buildOptimizedBvh();
 	}
 
 #endif //DISABLE_BVH
@@ -55,6 +44,7 @@ m_ownsBvh(false)
 btBvhTriangleMeshShape::btBvhTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression,const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,bool buildBvh)
 :btTriangleMeshShape(meshInterface),
 m_bvh(0),
+m_triangleInfoMap(0),
 m_useQuantizedAabbCompression(useQuantizedAabbCompression),
 m_ownsBvh(false)
 {
@@ -287,13 +277,13 @@ void	btBvhTriangleMeshShape::processAllTriangles(btTriangleCallback* callback,co
 				nodeSubPart);
 
 			unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride);
-			btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT);
+			btAssert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT||indicestype==PHY_UCHAR);
 	
 			const btVector3& meshScaling = m_meshInterface->getScaling();
 			for (int j=2;j>=0;j--)
 			{
 				
-				int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j];
+				int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:indicestype==PHY_INTEGER?gfxbase[j]:((unsigned char*)gfxbase)[j];
 
 
 #ifdef DEBUG_TRIANGLE_MESH
@@ -343,20 +333,25 @@ void   btBvhTriangleMeshShape::setLocalScaling(const btVector3& scaling)
    if ((getLocalScaling() -scaling).length2() > SIMD_EPSILON)
    {
       btTriangleMeshShape::setLocalScaling(scaling);
-      if (m_ownsBvh)
-      {
-         m_bvh->~btOptimizedBvh();
-         btAlignedFree(m_bvh);
-      }
-      ///m_localAabbMin/m_localAabbMax is already re-calculated in btTriangleMeshShape. We could just scale aabb, but this needs some more work
-      void* mem = btAlignedAlloc(sizeof(btOptimizedBvh),16);
-      m_bvh = new(mem) btOptimizedBvh();
-      //rebuild the bvh...
-      m_bvh->build(m_meshInterface,m_useQuantizedAabbCompression,m_localAabbMin,m_localAabbMax);
-      m_ownsBvh = true;
+	  buildOptimizedBvh();
    }
 }
 
+void   btBvhTriangleMeshShape::buildOptimizedBvh()
+{
+	if (m_ownsBvh)
+	{
+		m_bvh->~btOptimizedBvh();
+		btAlignedFree(m_bvh);
+	}
+	///m_localAabbMin/m_localAabbMax is already re-calculated in btTriangleMeshShape. We could just scale aabb, but this needs some more work
+	void* mem = btAlignedAlloc(sizeof(btOptimizedBvh),16);
+	m_bvh = new(mem) btOptimizedBvh();
+	//rebuild the bvh...
+	m_bvh->build(m_meshInterface,m_useQuantizedAabbCompression,m_localAabbMin,m_localAabbMax);
+	m_ownsBvh = true;
+}
+
 void   btBvhTriangleMeshShape::setOptimizedBvh(btOptimizedBvh* bvh, const btVector3& scaling)
 {
    btAssert(!m_bvh);
@@ -372,3 +367,100 @@ void   btBvhTriangleMeshShape::setOptimizedBvh(btOptimizedBvh* bvh, const btVect
 }
 
 
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btBvhTriangleMeshShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btTriangleMeshShapeData* trimeshData = (btTriangleMeshShapeData*) dataBuffer;
+
+	btCollisionShape::serialize(&trimeshData->m_collisionShapeData,serializer);
+
+	m_meshInterface->serialize(&trimeshData->m_meshInterface, serializer);
+
+	trimeshData->m_collisionMargin = float(m_collisionMargin);
+
+	
+
+	if (m_bvh && !(serializer->getSerializationFlags()&BT_SERIALIZE_NO_BVH))
+	{
+		void* chunk = serializer->findPointer(m_bvh);
+		if (chunk)
+		{
+#ifdef BT_USE_DOUBLE_PRECISION
+			trimeshData->m_quantizedDoubleBvh = (btQuantizedBvhData*)chunk;
+			trimeshData->m_quantizedFloatBvh = 0;
+#else
+			trimeshData->m_quantizedFloatBvh  = (btQuantizedBvhData*)chunk;
+			trimeshData->m_quantizedDoubleBvh= 0;
+#endif //BT_USE_DOUBLE_PRECISION
+		} else
+		{
+
+#ifdef BT_USE_DOUBLE_PRECISION
+			trimeshData->m_quantizedDoubleBvh = (btQuantizedBvhData*)serializer->getUniquePointer(m_bvh);
+			trimeshData->m_quantizedFloatBvh = 0;
+#else
+			trimeshData->m_quantizedFloatBvh  = (btQuantizedBvhData*)serializer->getUniquePointer(m_bvh);
+			trimeshData->m_quantizedDoubleBvh= 0;
+#endif //BT_USE_DOUBLE_PRECISION
+	
+			int sz = m_bvh->calculateSerializeBufferSizeNew();
+			btChunk* chunk = serializer->allocate(sz,1);
+			const char* structType = m_bvh->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_QUANTIZED_BVH_CODE,m_bvh);
+		}
+	} else
+	{
+		trimeshData->m_quantizedFloatBvh = 0;
+		trimeshData->m_quantizedDoubleBvh = 0;
+	}
+
+	
+
+	if (m_triangleInfoMap && !(serializer->getSerializationFlags()&BT_SERIALIZE_NO_TRIANGLEINFOMAP))
+	{
+		void* chunk = serializer->findPointer(m_triangleInfoMap);
+		if (chunk)
+		{
+			trimeshData->m_triangleInfoMap = (btTriangleInfoMapData*)chunk;
+		} else
+		{
+			trimeshData->m_triangleInfoMap = (btTriangleInfoMapData*)serializer->getUniquePointer(m_triangleInfoMap);
+			int sz = m_triangleInfoMap->calculateSerializeBufferSize();
+			btChunk* chunk = serializer->allocate(sz,1);
+			const char* structType = m_triangleInfoMap->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_TRIANLGE_INFO_MAP,m_triangleInfoMap);
+		}
+	} else
+	{
+		trimeshData->m_triangleInfoMap = 0;
+	}
+
+	return "btTriangleMeshShapeData";
+}
+
+void	btBvhTriangleMeshShape::serializeSingleBvh(btSerializer* serializer) const
+{
+	if (m_bvh)
+	{
+		int len = m_bvh->calculateSerializeBufferSizeNew(); //make sure not to use calculateSerializeBufferSize because it is used for in-place
+		btChunk* chunk = serializer->allocate(len,1);
+		const char* structType = m_bvh->serialize(chunk->m_oldPtr, serializer);
+		serializer->finalizeChunk(chunk,structType,BT_QUANTIZED_BVH_CODE,(void*)m_bvh);
+	}
+}
+
+void	btBvhTriangleMeshShape::serializeSingleTriangleInfoMap(btSerializer* serializer) const
+{
+	if (m_triangleInfoMap)
+	{
+		int len = m_triangleInfoMap->calculateSerializeBufferSize();
+		btChunk* chunk = serializer->allocate(len,1);
+		const char* structType = m_triangleInfoMap->serialize(chunk->m_oldPtr, serializer);
+		serializer->finalizeChunk(chunk,structType,BT_TRIANLGE_INFO_MAP,(void*)m_triangleInfoMap);
+	}
+}
+
+
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h
index 8458a7ebc..d1c216298 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h
@@ -13,13 +13,13 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef BVH_TRIANGLE_MESH_SHAPE_H
-#define BVH_TRIANGLE_MESH_SHAPE_H
+#ifndef BT_BVH_TRIANGLE_MESH_SHAPE_H
+#define BT_BVH_TRIANGLE_MESH_SHAPE_H
 
 #include "btTriangleMeshShape.h"
 #include "btOptimizedBvh.h"
 #include "LinearMath/btAlignedAllocator.h"
-
+#include "btTriangleInfoMap.h"
 
 ///The btBvhTriangleMeshShape is a static-triangle mesh shape with several optimizations, such as bounding volume hierarchy and cache friendly traversal for PlayStation 3 Cell SPU. It is recommended to enable useQuantizedAabbCompression for better memory usage.
 ///It takes a triangle mesh as input, for example a btTriangleMesh or btTriangleIndexVertexArray. The btBvhTriangleMeshShape class allows for triangle mesh deformations by a refit or partialRefit method.
@@ -29,6 +29,8 @@ ATTRIBUTE_ALIGNED16(class) btBvhTriangleMeshShape : public btTriangleMeshShape
 {
 
 	btOptimizedBvh*	m_bvh;
+	btTriangleInfoMap*	m_triangleInfoMap;
+
 	bool m_useQuantizedAabbCompression;
 	bool m_ownsBvh;
 	bool m_pad[11];////need padding due to alignment
@@ -37,7 +39,7 @@ public:
 
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
-	btBvhTriangleMeshShape() : btTriangleMeshShape(0),m_bvh(0),m_ownsBvh(false) {m_shapeType = TRIANGLE_MESH_SHAPE_PROXYTYPE;};
+	
 	btBvhTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression, bool buildBvh = true);
 
 	///optionally pass in a larger bvh aabb, used for quantization. This allows for deformations within this aabb
@@ -73,14 +75,65 @@ public:
 		return m_bvh;
 	}
 
-
 	void	setOptimizedBvh(btOptimizedBvh* bvh, const btVector3& localScaling=btVector3(1,1,1));
 
+	void    buildOptimizedBvh();
+
 	bool	usesQuantizedAabbCompression() const
 	{
 		return	m_useQuantizedAabbCompression;
 	}
-}
-;
 
-#endif //BVH_TRIANGLE_MESH_SHAPE_H
+	void	setTriangleInfoMap(btTriangleInfoMap* triangleInfoMap)
+	{
+		m_triangleInfoMap = triangleInfoMap;
+	}
+
+	const btTriangleInfoMap*	getTriangleInfoMap() const
+	{
+		return m_triangleInfoMap;
+	}
+	
+	btTriangleInfoMap*	getTriangleInfoMap()
+	{
+		return m_triangleInfoMap;
+	}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	virtual void	serializeSingleBvh(btSerializer* serializer) const;
+
+	virtual void	serializeSingleTriangleInfoMap(btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btTriangleMeshShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btStridingMeshInterfaceData m_meshInterface;
+
+	btQuantizedBvhFloatData		*m_quantizedFloatBvh;
+	btQuantizedBvhDoubleData	*m_quantizedDoubleBvh;
+
+	btTriangleInfoMapData	*m_triangleInfoMap;
+	
+	float	m_collisionMargin;
+
+	char m_pad3[4];
+	
+};
+
+
+SIMD_FORCE_INLINE	int	btBvhTriangleMeshShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btTriangleMeshShapeData);
+}
+
+
+
+#endif //BT_BVH_TRIANGLE_MESH_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.cpp
index 2faa11d43..864df26e9 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.cpp
@@ -55,7 +55,7 @@ btCapsuleShape::btCapsuleShape(btScalar radius, btScalar height) : btConvexInter
 		btVector3 pos(0,0,0);
 		pos[getUpAxis()] = getHalfHeight();
 
-		vtx = pos +vec*m_localScaling*(radius) - vec * getMargin();
+		vtx = pos +vec*(radius) - vec * getMargin();
 		newDot = vec.dot(vtx);
 		if (newDot > maxDot)
 		{
@@ -67,7 +67,7 @@ btCapsuleShape::btCapsuleShape(btScalar radius, btScalar height) : btConvexInter
 		btVector3 pos(0,0,0);
 		pos[getUpAxis()] = -getHalfHeight();
 
-		vtx = pos +vec*m_localScaling*(radius) - vec * getMargin();
+		vtx = pos +vec*(radius) - vec * getMargin();
 		newDot = vec.dot(vtx);
 		if (newDot > maxDot)
 		{
@@ -96,7 +96,7 @@ btCapsuleShape::btCapsuleShape(btScalar radius, btScalar height) : btConvexInter
 		{
 			btVector3 pos(0,0,0);
 			pos[getUpAxis()] = getHalfHeight();
-			vtx = pos +vec*m_localScaling*(radius) - vec * getMargin();
+			vtx = pos +vec*(radius) - vec * getMargin();
 			newDot = vec.dot(vtx);
 			if (newDot > maxDot)
 			{
@@ -107,7 +107,7 @@ btCapsuleShape::btCapsuleShape(btScalar radius, btScalar height) : btConvexInter
 		{
 			btVector3 pos(0,0,0);
 			pos[getUpAxis()] = -getHalfHeight();
-			vtx = pos +vec*m_localScaling*(radius) - vec * getMargin();
+			vtx = pos +vec*(radius) - vec * getMargin();
 			newDot = vec.dot(vtx);
 			if (newDot > maxDot)
 			{
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.h
index 782efb235..7578bb258 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCapsuleShape.h
@@ -23,7 +23,7 @@ subject to the following restrictions:
 ///The btCapsuleShape represents a capsule around the Y axis, there is also the btCapsuleShapeX aligned around the X axis and btCapsuleShapeZ around the Z axis.
 ///The total height is height+2*radius, so the height is just the height between the center of each 'sphere' of the capsule caps.
 ///The btCapsuleShape is a convex hull of two spheres. The btMultiSphereShape is a more general collision shape that takes the convex hull of multiple sphere, so it can also represent a capsule when just using two spheres.
-class btCapsuleShape : public btConvexInternalShape
+ATTRIBUTE_ALIGNED16(class) btCapsuleShape : public btConvexInternalShape
 {
 protected:
 	int	m_upAxis;
@@ -33,6 +33,9 @@ protected:
 	btCapsuleShape() : btConvexInternalShape() {m_shapeType = CAPSULE_SHAPE_PROXYTYPE;};
 
 public:
+	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btCapsuleShape(btScalar radius,btScalar height);
 
 	///CollisionShape Interface
@@ -43,6 +46,18 @@ public:
 
 	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
 	
+	virtual void setMargin(btScalar collisionMargin)
+	{
+		//correct the m_implicitShapeDimensions for the margin
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		
+		btConvexInternalShape::setMargin(collisionMargin);
+		btVector3 newMargin(getMargin(),getMargin(),getMargin());
+		m_implicitShapeDimensions = implicitShapeDimensionsWithMargin - newMargin;
+
+	}
+
 	virtual void getAabb (const btTransform& t, btVector3& aabbMin, btVector3& aabbMax) const
 	{
 			btVector3 halfExtents(getRadius(),getRadius(),getRadius());
@@ -50,8 +65,8 @@ public:
 			halfExtents += btVector3(getMargin(),getMargin(),getMargin());
 			btMatrix3x3 abs_b = t.getBasis().absolute();  
 			btVector3 center = t.getOrigin();
-			btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));		  
-			
+            btVector3 extent = halfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);
+        
 			aabbMin = center - extent;
 			aabbMax = center + extent;
 	}
@@ -88,6 +103,21 @@ public:
 		m_implicitShapeDimensions = (unScaledImplicitShapeDimensionsWithMargin * m_localScaling) - oldMargin;
 
 	}
+
+	virtual btVector3	getAnisotropicRollingFrictionDirection() const
+	{
+		btVector3 aniDir(0,0,0);
+		aniDir[getUpAxis()]=1;
+		return aniDir;
+	}
+
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
 };
 
 ///btCapsuleShapeX represents a capsule around the Z axis
@@ -124,6 +154,31 @@ public:
 	
 };
 
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCapsuleShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
 
+	int	m_upAxis;
+
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btCapsuleShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCapsuleShapeData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btCapsuleShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btCapsuleShapeData* shapeData = (btCapsuleShapeData*) dataBuffer;
+	
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData,serializer);
+
+	shapeData->m_upAxis = m_upAxis;
+	
+	return "btCapsuleShapeData";
+}
 
 #endif //BT_CAPSULE_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionMargin.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionMargin.h
index 18fd02604..474bf1fb4 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionMargin.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionMargin.h
@@ -13,14 +13,15 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COLLISION_MARGIN_H
-#define COLLISION_MARGIN_H
-
-//used by Gjk and some other algorithms
+#ifndef BT_COLLISION_MARGIN_H
+#define BT_COLLISION_MARGIN_H
 
+///The CONVEX_DISTANCE_MARGIN is a default collision margin for convex collision shapes derived from btConvexInternalShape.
+///This collision margin is used by Gjk and some other algorithms
+///Note that when creating small objects, you need to make sure to set a smaller collision margin, using the 'setMargin' API
 #define CONVEX_DISTANCE_MARGIN btScalar(0.04)// btScalar(0.1)//;//btScalar(0.01)
 
 
 
-#endif //COLLISION_MARGIN_H
+#endif //BT_COLLISION_MARGIN_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.cpp
index b534998a1..39ee21cad 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.cpp
@@ -13,10 +13,7 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 #include "BulletCollision/CollisionShapes/btCollisionShape.h"
-
-
-btScalar gContactThresholdFactor=btScalar(0.02);
-
+#include "LinearMath/btSerializer.h"
 
 /*
   Make sure this dummy function never changes so that it
@@ -45,10 +42,11 @@ void	btCollisionShape::getBoundingSphere(btVector3& center,btScalar& radius) con
 }
 
 
-btScalar	btCollisionShape::getContactBreakingThreshold() const
+btScalar	btCollisionShape::getContactBreakingThreshold(btScalar defaultContactThreshold) const
 {
-	return getAngularMotionDisc() * gContactThresholdFactor;
+	return getAngularMotionDisc() * defaultContactThreshold;
 }
+
 btScalar	btCollisionShape::getAngularMotionDisc() const
 {
 	///@todo cache this value, to improve performance
@@ -96,3 +94,26 @@ void btCollisionShape::calculateTemporalAabb(const btTransform& curTrans,const b
 	temporalAabbMin -= angularMotion3d;
 	temporalAabbMax += angularMotion3d;
 }
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btCollisionShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btCollisionShapeData* shapeData = (btCollisionShapeData*) dataBuffer;
+	char* name = (char*) serializer->findNameForPointer(this);
+	shapeData->m_name = (char*)serializer->getUniquePointer(name);
+	if (shapeData->m_name)
+	{
+		serializer->serializeName(name);
+	}
+	shapeData->m_shapeType = m_shapeType;
+	//shapeData->m_padding//??
+	return "btCollisionShapeData";
+}
+
+void	btCollisionShape::serializeSingleShape(btSerializer* serializer) const
+{
+	int len = calculateSerializeBufferSize();
+	btChunk* chunk = serializer->allocate(len,1);
+	const char* structType = serialize(chunk->m_oldPtr, serializer);
+	serializer->finalizeChunk(chunk,structType,BT_SHAPE_CODE,(void*)this);
+}
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.h
index 215758ef5..ff017a206 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCollisionShape.h
@@ -13,16 +13,18 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COLLISION_SHAPE_H
-#define COLLISION_SHAPE_H
+#ifndef BT_COLLISION_SHAPE_H
+#define BT_COLLISION_SHAPE_H
 
 #include "LinearMath/btTransform.h"
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btMatrix3x3.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" //for the shape types
+class btSerializer;
+
 
 ///The btCollisionShape class provides an interface for collision shapes that can be shared among btCollisionObjects.
-class btCollisionShape
+ATTRIBUTE_ALIGNED16(class) btCollisionShape
 {
 protected:
 	int m_shapeType;
@@ -30,6 +32,8 @@ protected:
 
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btCollisionShape() : m_shapeType (INVALID_SHAPE_PROXYTYPE), m_userPointer(0)
 	{
 	}
@@ -46,14 +50,14 @@ public:
 	///getAngularMotionDisc returns the maximus radius needed for Conservative Advancement to handle time-of-impact with rotations.
 	virtual btScalar	getAngularMotionDisc() const;
 
-	virtual btScalar	getContactBreakingThreshold() const;
+	virtual btScalar	getContactBreakingThreshold(btScalar defaultContactThresholdFactor) const;
 
 
 	///calculateTemporalAabb calculates the enclosing aabb for the moving object over interval [0..timeStep)
 	///result is conservative
 	void calculateTemporalAabb(const btTransform& curTrans,const btVector3& linvel,const btVector3& angvel,btScalar timeStep, btVector3& temporalAabbMin,btVector3& temporalAabbMax) const;
 
-#ifndef __SPU__
+
 
 	SIMD_FORCE_INLINE bool	isPolyhedral() const
 	{
@@ -69,6 +73,10 @@ public:
 	{
 		return btBroadphaseProxy::isConvex(getShapeType());
 	}
+	SIMD_FORCE_INLINE bool	isNonMoving() const
+	{
+		return btBroadphaseProxy::isNonMoving(getShapeType());
+	}
 	SIMD_FORCE_INLINE bool	isConcave() const
 	{
 		return btBroadphaseProxy::isConcave(getShapeType());
@@ -78,13 +86,18 @@ public:
 		return btBroadphaseProxy::isCompound(getShapeType());
 	}
 
+	SIMD_FORCE_INLINE bool	isSoftBody() const
+	{
+		return btBroadphaseProxy::isSoftBody(getShapeType());
+	}
+
 	///isInfinite is used to catch simulation error (aabb check)
 	SIMD_FORCE_INLINE bool isInfinite() const
 	{
 		return btBroadphaseProxy::isInfinite(getShapeType());
 	}
 
-	
+#ifndef __SPU__
 	virtual void	setLocalScaling(const btVector3& scaling) =0;
 	virtual const btVector3& getLocalScaling() const =0;
 	virtual void	calculateLocalInertia(btScalar mass,btVector3& inertia) const = 0;
@@ -96,6 +109,13 @@ public:
 
 	
 	int		getShapeType() const { return m_shapeType; }
+
+	///the getAnisotropicRollingFrictionDirection can be used in combination with setAnisotropicFriction
+	///See Bullet/Demos/RollingFrictionDemo for an example
+	virtual btVector3	getAnisotropicRollingFrictionDirection() const
+	{
+		return btVector3(1,1,1);
+	}
 	virtual void	setMargin(btScalar margin) = 0;
 	virtual btScalar	getMargin() const = 0;
 
@@ -111,7 +131,29 @@ public:
 		return m_userPointer;
 	}
 
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	virtual void	serializeSingleShape(btSerializer* serializer) const;
+
 };	
 
-#endif //COLLISION_SHAPE_H
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCollisionShapeData
+{
+	char	*m_name;
+	int		m_shapeType;
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btCollisionShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCollisionShapeData);
+}
+
+
+
+#endif //BT_COLLISION_SHAPE_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.cpp
index d77193874..12f422f19 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.cpp
@@ -16,6 +16,7 @@ subject to the following restrictions:
 #include "btCompoundShape.h"
 #include "btCollisionShape.h"
 #include "BulletCollision/BroadphaseCollision/btDbvt.h"
+#include "LinearMath/btSerializer.h"
 
 btCompoundShape::btCompoundShape(bool enableDynamicAabbTree)
 : m_localAabbMin(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT)),
@@ -51,6 +52,7 @@ void	btCompoundShape::addChildShape(const btTransform& localTransform,btCollisio
 	//m_childTransforms.push_back(localTransform);
 	//m_childShapes.push_back(shape);
 	btCompoundShapeChild child;
+	child.m_node = 0;
 	child.m_transform = localTransform;
 	child.m_childShape = shape;
 	child.m_childShapeType = shape->getShapeType();
@@ -83,7 +85,7 @@ void	btCompoundShape::addChildShape(const btTransform& localTransform,btCollisio
 
 }
 
-void	btCompoundShape::updateChildTransform(int childIndex, const btTransform& newChildTransform)
+void	btCompoundShape::updateChildTransform(int childIndex, const btTransform& newChildTransform,bool shouldRecalculateLocalAabb)
 {
 	m_children[childIndex].m_transform = newChildTransform;
 
@@ -97,7 +99,10 @@ void	btCompoundShape::updateChildTransform(int childIndex, const btTransform& ne
 		m_dynamicAabbTree->update(m_children[childIndex].m_node,bounds);
 	}
 
-	recalculateLocalAabb();
+	if (shouldRecalculateLocalAabb)
+	{
+		recalculateLocalAabb();
+	}
 }
 
 void btCompoundShape::removeChildShapeByIndex(int childShapeIndex)
@@ -109,6 +114,8 @@ void btCompoundShape::removeChildShapeByIndex(int childShapeIndex)
 		m_dynamicAabbTree->remove(m_children[childShapeIndex].m_node);
 	}
 	m_children.swap(childShapeIndex,m_children.size()-1);
+    if (m_dynamicAabbTree) 
+		m_children[childShapeIndex].m_node->dataAsInt = childShapeIndex;
 	m_children.pop_back();
 
 }
@@ -175,9 +182,7 @@ void btCompoundShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVect
 
 	btVector3 center = trans(localCenter);
 
-	btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
-		abs_b[1].dot(localHalfExtents),
-		abs_b[2].dot(localHalfExtents));
+    btVector3 extent = localHalfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);
 	aabbMin = center-extent;
 	aabbMax = center+extent;
 	
@@ -216,9 +221,13 @@ void btCompoundShape::calculatePrincipalAxisTransform(btScalar* masses, btTransf
 
 	for (k = 0; k < n; k++)
 	{
+		btAssert(masses[k]>0);
 		center += m_children[k].m_transform.getOrigin() * masses[k];
 		totalMass += masses[k];
 	}
+
+	btAssert(totalMass>0);
+
 	center /= totalMass;
 	principal.setOrigin(center);
 
@@ -264,3 +273,82 @@ void btCompoundShape::calculatePrincipalAxisTransform(btScalar* masses, btTransf
 
 
 
+void btCompoundShape::setLocalScaling(const btVector3& scaling)
+{
+
+	for(int i = 0; i < m_children.size(); i++)
+	{
+		btTransform childTrans = getChildTransform(i);
+		btVector3 childScale = m_children[i].m_childShape->getLocalScaling();
+//		childScale = childScale * (childTrans.getBasis() * scaling);
+		childScale = childScale * scaling / m_localScaling;
+		m_children[i].m_childShape->setLocalScaling(childScale);
+		childTrans.setOrigin((childTrans.getOrigin())*scaling);
+		updateChildTransform(i, childTrans,false);
+	}
+	
+	m_localScaling = scaling;
+	recalculateLocalAabb();
+
+}
+
+
+void btCompoundShape::createAabbTreeFromChildren()
+{
+    if ( !m_dynamicAabbTree )
+    {
+        void* mem = btAlignedAlloc(sizeof(btDbvt),16);
+        m_dynamicAabbTree = new(mem) btDbvt();
+        btAssert(mem==m_dynamicAabbTree);
+
+        for ( int index = 0; index < m_children.size(); index++ )
+        {
+            btCompoundShapeChild &child = m_children[index];
+
+            //extend the local aabbMin/aabbMax
+            btVector3 localAabbMin,localAabbMax;
+            child.m_childShape->getAabb(child.m_transform,localAabbMin,localAabbMax);
+
+            const btDbvtVolume  bounds=btDbvtVolume::FromMM(localAabbMin,localAabbMax);
+            child.m_node = m_dynamicAabbTree->insert(bounds,(void*)index);
+        }
+    }
+}
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btCompoundShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btCompoundShapeData* shapeData = (btCompoundShapeData*) dataBuffer;
+	btCollisionShape::serialize(&shapeData->m_collisionShapeData, serializer);
+
+	shapeData->m_collisionMargin = float(m_collisionMargin);
+	shapeData->m_numChildShapes = m_children.size();
+	shapeData->m_childShapePtr = 0;
+	if (shapeData->m_numChildShapes)
+	{
+		btChunk* chunk = serializer->allocate(sizeof(btCompoundShapeChildData),shapeData->m_numChildShapes);
+		btCompoundShapeChildData* memPtr = (btCompoundShapeChildData*)chunk->m_oldPtr;
+		shapeData->m_childShapePtr = (btCompoundShapeChildData*)serializer->getUniquePointer(memPtr);
+
+		for (int i=0;i<shapeData->m_numChildShapes;i++,memPtr++)
+		{
+			memPtr->m_childMargin = float(m_children[i].m_childMargin);
+			memPtr->m_childShape = (btCollisionShapeData*)serializer->getUniquePointer(m_children[i].m_childShape);
+			//don't serialize shapes that already have been serialized
+			if (!serializer->findPointer(m_children[i].m_childShape))
+			{
+				btChunk* chunk = serializer->allocate(m_children[i].m_childShape->calculateSerializeBufferSize(),1);
+				const char* structType = m_children[i].m_childShape->serialize(chunk->m_oldPtr,serializer);
+				serializer->finalizeChunk(chunk,structType,BT_SHAPE_CODE,m_children[i].m_childShape);
+			} 
+
+			memPtr->m_childShapeType = m_children[i].m_childShapeType;
+			m_children[i].m_transform.serializeFloat(memPtr->m_transform);
+		}
+		serializer->finalizeChunk(chunk,"btCompoundShapeChildData",BT_ARRAY_CODE,chunk->m_oldPtr);
+	}
+	return "btCompoundShapeData";
+}
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.h
index 577ef9513..141034a8e 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCompoundShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef COMPOUND_SHAPE_H
-#define COMPOUND_SHAPE_H
+#ifndef BT_COMPOUND_SHAPE_H
+#define BT_COMPOUND_SHAPE_H
 
 #include "btCollisionShape.h"
 
@@ -62,6 +62,11 @@ ATTRIBUTE_ALIGNED16(class) btCompoundShape	: public btCollisionShape
 	///increment m_updateRevision when adding/removing/replacing child shapes, so that some caches can be updated
 	int								m_updateRevision;
 
+	btScalar	m_collisionMargin;
+
+protected:
+	btVector3	m_localScaling;
+
 public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
@@ -101,7 +106,7 @@ public:
 	}
 
 	///set a new transform for a child, and update internal data structures (local aabb and dynamic tree)
-	void	updateChildTransform(int childIndex, const btTransform& newChildTransform);
+	void	updateChildTransform(int childIndex, const btTransform& newChildTransform, bool shouldRecalculateLocalAabb = true);
 
 
 	btCompoundShapeChild* getChildList()
@@ -116,10 +121,8 @@ public:
 	Use this yourself if you modify the children or their transforms. */
 	virtual void recalculateLocalAabb(); 
 
-	virtual void	setLocalScaling(const btVector3& scaling)
-	{
-		m_localScaling = scaling;
-	}
+	virtual void	setLocalScaling(const btVector3& scaling);
+
 	virtual const btVector3& getLocalScaling() const 
 	{
 		return m_localScaling;
@@ -140,13 +143,17 @@ public:
 		return "Compound";
 	}
 
-	//this is optional, but should make collision queries faster, by culling non-overlapping nodes
-	void	createAabbTreeFromChildren();
-
-	btDbvt*							getDynamicAabbTree()
+	const btDbvt*	getDynamicAabbTree() const
 	{
 		return m_dynamicAabbTree;
 	}
+	
+	btDbvt*	getDynamicAabbTree()
+	{
+		return m_dynamicAabbTree;
+	}
+
+	void createAabbTreeFromChildren();
 
 	///computes the exact moment of inertia and the transform from the coordinate system defined by the principal axes of the moment of inertia
 	///and the center of mass to the current coordinate system. "masses" points to an array of masses of the children. The resulting transform
@@ -160,13 +167,46 @@ public:
 		return m_updateRevision;
 	}
 
-private:
-	btScalar	m_collisionMargin;
-protected:
-	btVector3	m_localScaling;
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btCompoundShapeChildData
+{
+	btTransformFloatData	m_transform;
+	btCollisionShapeData	*m_childShape;
+	int						m_childShapeType;
+	float					m_childMargin;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCompoundShapeData
+{
+	btCollisionShapeData		m_collisionShapeData;
+
+	btCompoundShapeChildData	*m_childShapePtr;
+
+	int							m_numChildShapes;
+
+	float	m_collisionMargin;
 
 };
 
 
+SIMD_FORCE_INLINE	int	btCompoundShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCompoundShapeData);
+}
 
-#endif //COMPOUND_SHAPE_H
+
+
+
+
+
+
+#endif //BT_COMPOUND_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConcaveShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConcaveShape.h
index 2a370a47c..2917cc5b6 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConcaveShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConcaveShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONCAVE_SHAPE_H
-#define CONCAVE_SHAPE_H
+#ifndef BT_CONCAVE_SHAPE_H
+#define BT_CONCAVE_SHAPE_H
 
 #include "btCollisionShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
@@ -33,12 +33,14 @@ typedef enum PHY_ScalarType {
 
 ///The btConcaveShape class provides an interface for non-moving (static) concave shapes.
 ///It has been implemented by the btStaticPlaneShape, btBvhTriangleMeshShape and btHeightfieldTerrainShape.
-class btConcaveShape : public btCollisionShape
+ATTRIBUTE_ALIGNED16(class) btConcaveShape : public btCollisionShape
 {
 protected:
 	btScalar m_collisionMargin;
 
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btConcaveShape();
 
 	virtual ~btConcaveShape();
@@ -57,4 +59,4 @@ public:
 
 };
 
-#endif //CONCAVE_SHAPE_H
+#endif //BT_CONCAVE_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.cpp
index dfa3f941e..5e83087b3 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.cpp
@@ -131,3 +131,13 @@ btVector3	btConeShape::localGetSupportingVertex(const btVector3& vec)  const
 }
 
 
+void	btConeShape::setLocalScaling(const btVector3& scaling)
+{
+	int axis = m_coneIndices[1];
+	int r1 = m_coneIndices[0];
+	int r2 = m_coneIndices[2];
+	m_height *= scaling[axis] / m_localScaling[axis];
+	m_radius *= (scaling[r1] / m_localScaling[r1] + scaling[r2] / m_localScaling[r2]) / 2;
+	m_sinAngle = (m_radius / btSqrt(m_radius * m_radius + m_height * m_height));
+	btConvexInternalShape::setLocalScaling(scaling);
+}
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.h
index 9766c7ef2..5966ae48f 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConeShape.h
@@ -13,14 +13,14 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONE_MINKOWSKI_H
-#define CONE_MINKOWSKI_H
+#ifndef BT_CONE_MINKOWSKI_H
+#define BT_CONE_MINKOWSKI_H
 
 #include "btConvexInternalShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
 
 ///The btConeShape implements a cone shape primitive, centered around the origin and aligned with the Y axis. The btConeShapeX is aligned around the X axis and btConeShapeZ around the Z axis.
-class btConeShape : public btConvexInternalShape
+ATTRIBUTE_ALIGNED16(class) btConeShape : public btConvexInternalShape
 
 {
 
@@ -32,6 +32,8 @@ class btConeShape : public btConvexInternalShape
 
 
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btConeShape (btScalar radius,btScalar height);
 	
 	virtual btVector3	localGetSupportingVertex(const btVector3& vec) const;
@@ -81,6 +83,14 @@ public:
 		{
 			return m_coneIndices[1];
 		}
+
+	virtual btVector3	getAnisotropicRollingFrictionDirection() const
+	{
+		return btVector3 (0,1,0);
+	}
+
+	virtual void	setLocalScaling(const btVector3& scaling);
+
 };
 
 ///btConeShape implements a Cone shape, around the X axis
@@ -88,6 +98,12 @@ class btConeShapeX : public btConeShape
 {
 	public:
 		btConeShapeX(btScalar radius,btScalar height);
+
+	virtual btVector3	getAnisotropicRollingFrictionDirection() const
+	{
+		return btVector3 (1,0,0);
+	}
+
 };
 
 ///btConeShapeZ implements a Cone shape, around the Z axis
@@ -95,6 +111,12 @@ class btConeShapeZ : public btConeShape
 {
 	public:
 		btConeShapeZ(btScalar radius,btScalar height);
-};
-#endif //CONE_MINKOWSKI_H
+
+	virtual btVector3	getAnisotropicRollingFrictionDirection() const
+	{
+		return btVector3 (0,0,1);
+	}
+
+};
+#endif //BT_CONE_MINKOWSKI_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvex2dShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvex2dShape.h
index 58166c829..bbd1caf42 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvex2dShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvex2dShape.h
@@ -19,14 +19,16 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
 
-///The btConvex2dShape allows to use arbitrary convex shapes are 2d convex shapes, with the Z component assumed to be 0.
+///The btConvex2dShape allows to use arbitrary convex shapes as 2d convex shapes, with the Z component assumed to be 0.
 ///For 2d boxes, the btBox2dShape is recommended.
-class btConvex2dShape : public btConvexShape
+ATTRIBUTE_ALIGNED16(class) btConvex2dShape : public btConvexShape
 {
 	btConvexShape*	m_childConvexShape;
 
 	public:
 	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btConvex2dShape(	btConvexShape* convexChildShape);
 	
 	virtual ~btConvex2dShape();
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.cpp
index 3008524e0..4d0ca1451 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.cpp
@@ -17,7 +17,7 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btCollisionMargin.h"
 
 #include "LinearMath/btQuaternion.h"
-
+#include "LinearMath/btSerializer.h"
 
 btConvexHullShape ::btConvexHullShape (const btScalar* points,int numPoints,int stride) : btPolyhedralConvexAabbCachingShape ()
 {
@@ -52,35 +52,20 @@ void btConvexHullShape::addPoint(const btVector3& point)
 
 }
 
-btVector3	btConvexHullShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
+btVector3	btConvexHullShape::localGetSupportingVertexWithoutMargin(const btVector3& vec)const
 {
 	btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.));
-	btScalar newDot,maxDot = btScalar(-BT_LARGE_FLOAT);
+	btScalar maxDot = btScalar(-BT_LARGE_FLOAT);
 
-	btVector3 vec = vec0;
-	btScalar lenSqr = vec.length2();
-	if (lenSqr < btScalar(0.0001))
-	{
-		vec.setValue(1,0,0);
-	} else
-	{
-		btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
-		vec *= rlen;
-	}
+    // Here we take advantage of dot(a, b*c) = dot(a*b, c).  Note: This is true mathematically, but not numerically. 
+    if( 0 < m_unscaledPoints.size() )
+    {
+        btVector3 scaled = vec * m_localScaling;
+        int index = (int) scaled.maxDot( &m_unscaledPoints[0], m_unscaledPoints.size(), maxDot); // FIXME: may violate encapsulation of m_unscaledPoints
+        return m_unscaledPoints[index] * m_localScaling;
+    }
 
-
-	for (int i=0;i<m_unscaledPoints.size();i++)
-	{
-		btVector3 vtx = m_unscaledPoints[i] * m_localScaling;
-
-		newDot = vec.dot(vtx);
-		if (newDot > maxDot)
-		{
-			maxDot = newDot;
-			supVec = vtx;
-		}
-	}
-	return supVec;
+    return supVec;
 }
 
 void	btConvexHullShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
@@ -93,23 +78,19 @@ void	btConvexHullShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const
 			supportVerticesOut[i][3] = btScalar(-BT_LARGE_FLOAT);
 		}
 	}
-	for (int i=0;i<m_unscaledPoints.size();i++)
-	{
-		btVector3 vtx = getScaledPoint(i);
 
-		for (int j=0;j<numVectors;j++)
-		{
-			const btVector3& vec = vectors[j];
-			
-			newDot = vec.dot(vtx);
-			if (newDot > supportVerticesOut[j][3])
-			{
-				//WARNING: don't swap next lines, the w component would get overwritten!
-				supportVerticesOut[j] = vtx;
-				supportVerticesOut[j][3] = newDot;
-			}
-		}
-	}
+    for (int j=0;j<numVectors;j++)
+    {
+        btVector3 vec = vectors[j] * m_localScaling;        // dot(a*b,c) = dot(a,b*c)
+        if( 0 <  m_unscaledPoints.size() )
+        {
+            int i = (int) vec.maxDot( &m_unscaledPoints[0], m_unscaledPoints.size(), newDot);
+            supportVerticesOut[j] = getScaledPoint(i);
+            supportVerticesOut[j][3] = newDot;        
+        }
+        else
+            supportVerticesOut[j][3] = -BT_LARGE_FLOAT;
+    }
 
 
 
@@ -186,3 +167,79 @@ bool btConvexHullShape::isInside(const btVector3& ,btScalar ) const
 	return false;
 }
 
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btConvexHullShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	//int szc = sizeof(btConvexHullShapeData);
+	btConvexHullShapeData* shapeData = (btConvexHullShapeData*) dataBuffer;
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData, serializer);
+
+	int numElem = m_unscaledPoints.size();
+	shapeData->m_numUnscaledPoints = numElem;
+#ifdef BT_USE_DOUBLE_PRECISION
+	shapeData->m_unscaledPointsFloatPtr = 0;
+	shapeData->m_unscaledPointsDoublePtr = numElem ? (btVector3Data*)serializer->getUniquePointer((void*)&m_unscaledPoints[0]):  0;
+#else
+	shapeData->m_unscaledPointsFloatPtr = numElem ? (btVector3Data*)serializer->getUniquePointer((void*)&m_unscaledPoints[0]):  0;
+	shapeData->m_unscaledPointsDoublePtr = 0;
+#endif
+	
+	if (numElem)
+	{
+		int sz = sizeof(btVector3Data);
+	//	int sz2 = sizeof(btVector3DoubleData);
+	//	int sz3 = sizeof(btVector3FloatData);
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btVector3Data* memPtr = (btVector3Data*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_unscaledPoints[i].serialize(*memPtr);
+		}
+		serializer->finalizeChunk(chunk,btVector3DataName,BT_ARRAY_CODE,(void*)&m_unscaledPoints[0]);
+	}
+	
+	return "btConvexHullShapeData";
+}
+
+void btConvexHullShape::project(const btTransform& trans, const btVector3& dir, btScalar& minProj, btScalar& maxProj, btVector3& witnesPtMin,btVector3& witnesPtMax) const
+{
+#if 1
+	minProj = FLT_MAX;
+	maxProj = -FLT_MAX;
+
+	int numVerts = m_unscaledPoints.size();
+	for(int i=0;i<numVerts;i++)
+	{
+		btVector3 vtx = m_unscaledPoints[i] * m_localScaling;
+		btVector3 pt = trans * vtx;
+		btScalar dp = pt.dot(dir);
+		if(dp < minProj)	
+		{
+			minProj = dp;
+			witnesPtMin = pt;
+		}
+		if(dp > maxProj)	
+		{
+			maxProj = dp;
+			witnesPtMax=pt;
+		}
+	}
+#else
+	btVector3 localAxis = dir*trans.getBasis();
+	witnesPtMin  = trans(localGetSupportingVertex(localAxis));
+	witnesPtMax = trans(localGetSupportingVertex(-localAxis));
+
+	minProj = witnesPtMin.dot(dir);
+	maxProj = witnesPtMax.dot(dir);
+#endif
+
+	if(minProj>maxProj)
+	{
+		btSwap(minProj,maxProj);
+		btSwap(witnesPtMin,witnesPtMax);
+	}
+
+
+}
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.h
index 2b0494d19..f4e8f644b 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexHullShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONVEX_HULL_SHAPE_H
-#define CONVEX_HULL_SHAPE_H
+#ifndef BT_CONVEX_HULL_SHAPE_H
+#define BT_CONVEX_HULL_SHAPE_H
 
 #include "btPolyhedralConvexShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
@@ -73,6 +73,8 @@ public:
 	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
 	
 
+	virtual void project(const btTransform& trans, const btVector3& dir, btScalar& minProj, btScalar& maxProj, btVector3& witnesPtMin,btVector3& witnesPtMax) const;
+
 
 	//debugging
 	virtual const char*	getName()const {return "Convex";}
@@ -89,8 +91,32 @@ public:
 	///in case we receive negative scaling
 	virtual void	setLocalScaling(const btVector3& scaling);
 
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btConvexHullShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
+
+	btVector3FloatData	*m_unscaledPointsFloatPtr;
+	btVector3DoubleData	*m_unscaledPointsDoublePtr;
+
+	int		m_numUnscaledPoints;
+	char m_padding3[4];
+
 };
 
 
-#endif //CONVEX_HULL_SHAPE_H
+SIMD_FORCE_INLINE	int	btConvexHullShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btConvexHullShapeData);
+}
+
+
+#endif //BT_CONVEX_HULL_SHAPE_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexInternalShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexInternalShape.h
index 3865d8e4b..37e04f5fc 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexInternalShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexInternalShape.h
@@ -19,8 +19,14 @@ subject to the following restrictions:
 #include "btConvexShape.h"
 #include "LinearMath/btAabbUtil2.h"
 
+
 ///The btConvexInternalShape is an internal base class, shared by most convex shape implementations.
-class btConvexInternalShape : public btConvexShape
+///The btConvexInternalShape uses a default collision margin set to CONVEX_DISTANCE_MARGIN.
+///This collision margin used by Gjk and some other algorithms, see also btCollisionMargin.h
+///Note that when creating small shapes (derived from btConvexInternalShape), 
+///you need to make sure to set a smaller collision margin, using the 'setMargin' API
+///There is a automatic mechanism 'setSafeMargin' used by btBoxShape and btCylinderShape
+ATTRIBUTE_ALIGNED16(class) btConvexInternalShape : public btConvexShape
 {
 
 	protected:
@@ -38,7 +44,7 @@ class btConvexInternalShape : public btConvexShape
 
 public:
 
-	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
 
 	virtual ~btConvexInternalShape()
 	{
@@ -52,6 +58,32 @@ public:
 		return m_implicitShapeDimensions;
 	}
 
+	///warning: use setImplicitShapeDimensions with care
+	///changing a collision shape while the body is in the world is not recommended,
+	///it is best to remove the body from the world, then make the change, and re-add it
+	///alternatively flush the contact points, see documentation for 'cleanProxyFromPairs'
+	void	setImplicitShapeDimensions(const btVector3& dimensions)
+	{
+		m_implicitShapeDimensions = dimensions;
+	}
+
+	void	setSafeMargin(btScalar minDimension, btScalar defaultMarginMultiplier = 0.1f)
+	{
+		btScalar safeMargin = defaultMarginMultiplier*minDimension;
+		if (safeMargin < getMargin())
+		{
+			setMargin(safeMargin);
+		}
+	}
+	void	setSafeMargin(const btVector3& halfExtents, btScalar defaultMarginMultiplier = 0.1f)
+	{
+		//see http://code.google.com/p/bullet/issues/detail?id=349
+		//this margin check could could be added to other collision shapes too,
+		//or add some assert/warning somewhere
+		btScalar minDimension=halfExtents[halfExtents.minAxis()]; 		
+		setSafeMargin(minDimension, defaultMarginMultiplier);
+	}
+
 	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
 	void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
 	{
@@ -100,10 +132,51 @@ public:
 		btAssert(0);
 	}
 
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
 
 	
 };
 
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btConvexInternalShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btVector3FloatData	m_localScaling;
+
+	btVector3FloatData	m_implicitShapeDimensions;
+	
+	float			m_collisionMargin;
+
+	int	m_padding;
+
+};
+
+
+
+SIMD_FORCE_INLINE	int	btConvexInternalShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btConvexInternalShapeData);
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btConvexInternalShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btConvexInternalShapeData* shapeData = (btConvexInternalShapeData*) dataBuffer;
+	btCollisionShape::serialize(&shapeData->m_collisionShapeData, serializer);
+
+	m_implicitShapeDimensions.serializeFloat(shapeData->m_implicitShapeDimensions);
+	m_localScaling.serializeFloat(shapeData->m_localScaling);
+	shapeData->m_collisionMargin = float(m_collisionMargin);
+
+	return "btConvexInternalShapeData";
+}
+
+
+
 
 ///btConvexInternalAabbCachingShape adds local aabb caching for convex shapes, to avoid expensive bounding box calculations
 class btConvexInternalAabbCachingShape : public btConvexInternalShape
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp
index c1b155aef..ad1d1bf78 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp
@@ -28,7 +28,7 @@ void btConvexPointCloudShape::setLocalScaling(const btVector3& scaling)
 btVector3	btConvexPointCloudShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
 {
 	btVector3 supVec(btScalar(0.),btScalar(0.),btScalar(0.));
-	btScalar newDot,maxDot = btScalar(-BT_LARGE_FLOAT);
+	btScalar maxDot = btScalar(-BT_LARGE_FLOAT);
 
 	btVector3 vec = vec0;
 	btScalar lenSqr = vec.length2();
@@ -40,51 +40,33 @@ btVector3	btConvexPointCloudShape::localGetSupportingVertexWithoutMargin(const b
 		btScalar rlen = btScalar(1.) / btSqrt(lenSqr );
 		vec *= rlen;
 	}
+    
+    if( m_numPoints > 0 )
+    {
+        // Here we take advantage of dot(a*b, c) = dot( a, b*c) to do less work. Note this transformation is true mathematically, not numerically.
+    //    btVector3 scaled = vec * m_localScaling;
+        int index = (int) vec.maxDot( &m_unscaledPoints[0], m_numPoints, maxDot);   //FIXME: may violate encapsulation of m_unscaledPoints
+        return getScaledPoint(index);
+    }
 
-
-	for (int i=0;i<m_numPoints;i++)
-	{
-		btVector3 vtx = getScaledPoint(i);
-
-		newDot = vec.dot(vtx);
-		if (newDot > maxDot)
-		{
-			maxDot = newDot;
-			supVec = vtx;
-		}
-	}
 	return supVec;
 }
 
 void	btConvexPointCloudShape::batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const
 {
-	btScalar newDot;
-	//use 'w' component of supportVerticesOut?
-	{
-		for (int i=0;i<numVectors;i++)
-		{
-			supportVerticesOut[i][3] = btScalar(-BT_LARGE_FLOAT);
-		}
-	}
-	for (int i=0;i<m_numPoints;i++)
-	{
-		btVector3 vtx = getScaledPoint(i);
-
-		for (int j=0;j<numVectors;j++)
-		{
-			const btVector3& vec = vectors[j];
-			
-			newDot = vec.dot(vtx);
-			if (newDot > supportVerticesOut[j][3])
-			{
-				//WARNING: don't swap next lines, the w component would get overwritten!
-				supportVerticesOut[j] = vtx;
-				supportVerticesOut[j][3] = newDot;
-			}
-		}
-	}
-
-
+    for( int j = 0; j < numVectors; j++ )
+    {
+        const btVector3& vec = vectors[j] * m_localScaling;  // dot( a*c, b) = dot(a, b*c)
+        btScalar maxDot;
+        int index = (int) vec.maxDot( &m_unscaledPoints[0], m_numPoints, maxDot);
+        supportVerticesOut[j][3] = btScalar(-BT_LARGE_FLOAT);
+        if( 0 <= index )
+        {
+            //WARNING: don't swap next lines, the w component would get overwritten!
+            supportVerticesOut[j] = getScaledPoint(index);
+            supportVerticesOut[j][3] = maxDot;
+        }
+    }
 
 }
 	
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPolyhedron.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPolyhedron.cpp
new file mode 100644
index 000000000..f4324c1f4
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPolyhedron.cpp
@@ -0,0 +1,302 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+///Separating axis rest based on work from Pierre Terdiman, see
+///And contact clipping based on work from Simon Hobbs
+
+#include "btConvexPolyhedron.h"
+#include "LinearMath/btHashMap.h"
+
+btConvexPolyhedron::btConvexPolyhedron()
+{
+
+}
+btConvexPolyhedron::~btConvexPolyhedron()
+{
+
+}
+
+
+inline bool IsAlmostZero(const btVector3& v)
+{
+	if(fabsf(v.x())>1e-6 || fabsf(v.y())>1e-6 || fabsf(v.z())>1e-6)	return false;
+	return true;
+}
+
+struct btInternalVertexPair
+{
+	btInternalVertexPair(short int v0,short int v1)
+		:m_v0(v0),
+		m_v1(v1)
+	{
+		if (m_v1>m_v0)
+			btSwap(m_v0,m_v1);
+	}
+	short int m_v0;
+	short int m_v1;
+	int getHash() const
+	{
+		return m_v0+(m_v1<<16);
+	}
+	bool equals(const btInternalVertexPair& other) const
+	{
+		return m_v0==other.m_v0 && m_v1==other.m_v1;
+	}
+};
+
+struct btInternalEdge
+{
+	btInternalEdge()
+		:m_face0(-1),
+		m_face1(-1)
+	{
+	}
+	short int m_face0;
+	short int m_face1;
+};
+
+//
+
+#ifdef TEST_INTERNAL_OBJECTS
+bool btConvexPolyhedron::testContainment() const
+{
+	for(int p=0;p<8;p++)
+	{
+		btVector3 LocalPt;
+		if(p==0)		LocalPt = m_localCenter + btVector3(m_extents[0], m_extents[1], m_extents[2]);
+		else if(p==1)	LocalPt = m_localCenter + btVector3(m_extents[0], m_extents[1], -m_extents[2]);
+		else if(p==2)	LocalPt = m_localCenter + btVector3(m_extents[0], -m_extents[1], m_extents[2]);
+		else if(p==3)	LocalPt = m_localCenter + btVector3(m_extents[0], -m_extents[1], -m_extents[2]);
+		else if(p==4)	LocalPt = m_localCenter + btVector3(-m_extents[0], m_extents[1], m_extents[2]);
+		else if(p==5)	LocalPt = m_localCenter + btVector3(-m_extents[0], m_extents[1], -m_extents[2]);
+		else if(p==6)	LocalPt = m_localCenter + btVector3(-m_extents[0], -m_extents[1], m_extents[2]);
+		else if(p==7)	LocalPt = m_localCenter + btVector3(-m_extents[0], -m_extents[1], -m_extents[2]);
+
+		for(int i=0;i<m_faces.size();i++)
+		{
+			const btVector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
+			const btScalar d = LocalPt.dot(Normal) + m_faces[i].m_plane[3];
+			if(d>0.0f)
+				return false;
+		}
+	}
+	return true;
+}
+#endif
+
+void	btConvexPolyhedron::initialize()
+{
+
+	btHashMap<btInternalVertexPair,btInternalEdge> edges;
+
+	btScalar TotalArea = 0.0f;
+	
+	m_localCenter.setValue(0, 0, 0);
+	for(int i=0;i<m_faces.size();i++)
+	{
+		int numVertices = m_faces[i].m_indices.size();
+		int NbTris = numVertices;
+		for(int j=0;j<NbTris;j++)
+		{
+			int k = (j+1)%numVertices;
+			btInternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
+			btInternalEdge* edptr = edges.find(vp);
+			btVector3 edge = m_vertices[vp.m_v1]-m_vertices[vp.m_v0];
+			edge.normalize();
+
+			bool found = false;
+
+			for (int p=0;p<m_uniqueEdges.size();p++)
+			{
+				
+				if (IsAlmostZero(m_uniqueEdges[p]-edge) || 
+					IsAlmostZero(m_uniqueEdges[p]+edge))
+				{
+					found = true;
+					break;
+				}
+			}
+
+			if (!found)
+			{
+				m_uniqueEdges.push_back(edge);
+			}
+
+			if (edptr)
+			{
+				btAssert(edptr->m_face0>=0);
+				btAssert(edptr->m_face1<0);
+				edptr->m_face1 = i;
+			} else
+			{
+				btInternalEdge ed;
+				ed.m_face0 = i;
+				edges.insert(vp,ed);
+			}
+		}
+	}
+
+#ifdef USE_CONNECTED_FACES
+	for(int i=0;i<m_faces.size();i++)
+	{
+		int numVertices = m_faces[i].m_indices.size();
+		m_faces[i].m_connectedFaces.resize(numVertices);
+
+		for(int j=0;j<numVertices;j++)
+		{
+			int k = (j+1)%numVertices;
+			btInternalVertexPair vp(m_faces[i].m_indices[j],m_faces[i].m_indices[k]);
+			btInternalEdge* edptr = edges.find(vp);
+			btAssert(edptr);
+			btAssert(edptr->m_face0>=0);
+			btAssert(edptr->m_face1>=0);
+
+			int connectedFace = (edptr->m_face0==i)?edptr->m_face1:edptr->m_face0;
+			m_faces[i].m_connectedFaces[j] = connectedFace;
+		}
+	}
+#endif//USE_CONNECTED_FACES
+
+	for(int i=0;i<m_faces.size();i++)
+	{
+		int numVertices = m_faces[i].m_indices.size();
+		int NbTris = numVertices-2;
+		
+		const btVector3& p0 = m_vertices[m_faces[i].m_indices[0]];
+		for(int j=1;j<=NbTris;j++)
+		{
+			int k = (j+1)%numVertices;
+			const btVector3& p1 = m_vertices[m_faces[i].m_indices[j]];
+			const btVector3& p2 = m_vertices[m_faces[i].m_indices[k]];
+			btScalar Area = ((p0 - p1).cross(p0 - p2)).length() * 0.5f;
+			btVector3 Center = (p0+p1+p2)/3.0f;
+			m_localCenter += Area * Center;
+			TotalArea += Area;
+		}
+	}
+	m_localCenter /= TotalArea;
+
+
+
+
+#ifdef TEST_INTERNAL_OBJECTS
+	if(1)
+	{
+		m_radius = FLT_MAX;
+		for(int i=0;i<m_faces.size();i++)
+		{
+			const btVector3 Normal(m_faces[i].m_plane[0], m_faces[i].m_plane[1], m_faces[i].m_plane[2]);
+			const btScalar dist = btFabs(m_localCenter.dot(Normal) + m_faces[i].m_plane[3]);
+			if(dist<m_radius)
+				m_radius = dist;
+		}
+
+	
+		btScalar MinX = FLT_MAX;
+		btScalar MinY = FLT_MAX;
+		btScalar MinZ = FLT_MAX;
+		btScalar MaxX = -FLT_MAX;
+		btScalar MaxY = -FLT_MAX;
+		btScalar MaxZ = -FLT_MAX;
+		for(int i=0; i<m_vertices.size(); i++)
+		{
+			const btVector3& pt = m_vertices[i];
+			if(pt.x()<MinX)	MinX = pt.x();
+			if(pt.x()>MaxX)	MaxX = pt.x();
+			if(pt.y()<MinY)	MinY = pt.y();
+			if(pt.y()>MaxY)	MaxY = pt.y();
+			if(pt.z()<MinZ)	MinZ = pt.z();
+			if(pt.z()>MaxZ)	MaxZ = pt.z();
+		}
+		mC.setValue(MaxX+MinX, MaxY+MinY, MaxZ+MinZ);
+		mE.setValue(MaxX-MinX, MaxY-MinY, MaxZ-MinZ);
+
+
+
+//		const btScalar r = m_radius / sqrtf(2.0f);
+		const btScalar r = m_radius / sqrtf(3.0f);
+		const int LargestExtent = mE.maxAxis();
+		const btScalar Step = (mE[LargestExtent]*0.5f - r)/1024.0f;
+		m_extents[0] = m_extents[1] = m_extents[2] = r;
+		m_extents[LargestExtent] = mE[LargestExtent]*0.5f;
+		bool FoundBox = false;
+		for(int j=0;j<1024;j++)
+		{
+			if(testContainment())
+			{
+				FoundBox = true;
+				break;
+			}
+
+			m_extents[LargestExtent] -= Step;
+		}
+		if(!FoundBox)
+		{
+			m_extents[0] = m_extents[1] = m_extents[2] = r;
+		}
+		else
+		{
+			// Refine the box
+			const btScalar Step = (m_radius - r)/1024.0f;
+			const int e0 = (1<<LargestExtent) & 3;
+			const int e1 = (1<<e0) & 3;
+
+			for(int j=0;j<1024;j++)
+			{
+				const btScalar Saved0 = m_extents[e0];
+				const btScalar Saved1 = m_extents[e1];
+				m_extents[e0] += Step;
+				m_extents[e1] += Step;
+
+				if(!testContainment())
+				{
+					m_extents[e0] = Saved0;
+					m_extents[e1] = Saved1;
+					break;
+				}
+			}
+		}
+	}
+#endif
+}
+
+void btConvexPolyhedron::project(const btTransform& trans, const btVector3& dir, btScalar& minProj, btScalar& maxProj, btVector3& witnesPtMin,btVector3& witnesPtMax) const
+{
+	minProj = FLT_MAX;
+	maxProj = -FLT_MAX;
+	int numVerts = m_vertices.size();
+	for(int i=0;i<numVerts;i++)
+	{
+		btVector3 pt = trans * m_vertices[i];
+		btScalar dp = pt.dot(dir);
+		if(dp < minProj)
+		{
+			minProj = dp;
+			witnesPtMin = pt;
+		}
+		if(dp > maxProj)
+		{
+			maxProj = dp;
+			witnesPtMax = pt;
+		}
+	}
+	if(minProj>maxProj)
+	{
+		btSwap(minProj,maxProj);
+		btSwap(witnesPtMin,witnesPtMax);
+	}
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPolyhedron.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPolyhedron.h
new file mode 100644
index 000000000..d3cd066ac
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexPolyhedron.h
@@ -0,0 +1,65 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+
+
+#ifndef _BT_POLYHEDRAL_FEATURES_H
+#define _BT_POLYHEDRAL_FEATURES_H
+
+#include "LinearMath/btTransform.h"
+#include "LinearMath/btAlignedObjectArray.h"
+
+#define TEST_INTERNAL_OBJECTS 1
+
+
+struct btFace
+{
+	btAlignedObjectArray<int>	m_indices;
+//	btAlignedObjectArray<int>	m_connectedFaces;
+	btScalar	m_plane[4];
+};
+
+
+ATTRIBUTE_ALIGNED16(class) btConvexPolyhedron
+{
+	public:
+		
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+		
+	btConvexPolyhedron();
+	virtual	~btConvexPolyhedron();
+
+	btAlignedObjectArray<btVector3>	m_vertices;
+	btAlignedObjectArray<btFace>	m_faces;
+	btAlignedObjectArray<btVector3> m_uniqueEdges;
+
+	btVector3		m_localCenter;
+	btVector3		m_extents;
+	btScalar		m_radius;
+	btVector3		mC;
+	btVector3		mE;
+
+	void	initialize();
+	bool testContainment() const;
+
+	void project(const btTransform& trans, const btVector3& dir, btScalar& minProj, btScalar& maxProj, btVector3& witnesPtMin,btVector3& witnesPtMax) const;
+};
+
+	
+#endif //_BT_POLYHEDRAL_FEATURES_H
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.cpp
index f5f3aa58a..3ffa42228 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.cpp
@@ -43,6 +43,23 @@ btConvexShape::~btConvexShape()
 }
 
 
+void btConvexShape::project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const
+{
+	btVector3 localAxis = dir*trans.getBasis();
+	btVector3 vtx1 = trans(localGetSupportingVertex(localAxis));
+	btVector3 vtx2 = trans(localGetSupportingVertex(-localAxis));
+
+	min = vtx1.dot(dir);
+	max = vtx2.dot(dir);
+
+	if(min>max)
+	{
+		btScalar tmp = min;
+		min = max;
+		max = tmp;
+	}
+}
+
 
 static btVector3 convexHullSupport (const btVector3& localDirOrg, const btVector3* points, int numPoints, const btVector3& localScaling)
 {	
@@ -92,19 +109,8 @@ static btVector3 convexHullSupport (const btVector3& localDirOrg, const btVector
 	return supVec;
 #else
 
-	btScalar newDot,maxDot = btScalar(-BT_LARGE_FLOAT);
-	int ptIndex = -1;
-
-	for (int i=0;i<numPoints;i++)
-	{
-
-		newDot = vec.dot(points[i]);
-		if (newDot > maxDot)
-		{
-			maxDot = newDot;
-			ptIndex = i;
-		}
-	}
+    btScalar maxDot;
+    long ptIndex = vec.maxDot( points, numPoints, maxDot);
 	btAssert(ptIndex >= 0);
 	btVector3 supVec = points[ptIndex] * localScaling;
 	return supVec;
@@ -124,16 +130,26 @@ btVector3 btConvexShape::localGetSupportVertexWithoutMarginNonVirtual (const btV
 		btBoxShape* convexShape = (btBoxShape*)this;
 		const btVector3& halfExtents = convexShape->getImplicitShapeDimensions();
 
+#if defined( __APPLE__ ) && (defined( BT_USE_SSE )||defined( BT_USE_NEON ))
+    #if defined( BT_USE_SSE )
+            return btVector3( _mm_xor_ps( _mm_and_ps( localDir.mVec128, (__m128){-0.0f, -0.0f, -0.0f, -0.0f }), halfExtents.mVec128 ));
+    #elif defined( BT_USE_NEON )
+            return btVector3( (float32x4_t) (((uint32x4_t) localDir.mVec128 & (uint32x4_t){ 0x80000000, 0x80000000, 0x80000000, 0x80000000}) ^ (uint32x4_t) halfExtents.mVec128 ));
+    #else
+        #error unknown vector arch
+    #endif
+#else
 		return btVector3(btFsels(localDir.x(), halfExtents.x(), -halfExtents.x()),
 			btFsels(localDir.y(), halfExtents.y(), -halfExtents.y()),
 			btFsels(localDir.z(), halfExtents.z(), -halfExtents.z()));
+#endif
 	}
 	case TRIANGLE_SHAPE_PROXYTYPE:
 	{
 		btTriangleShape* triangleShape = (btTriangleShape*)this;
 		btVector3 dir(localDir.getX(),localDir.getY(),localDir.getZ());
 		btVector3* vertices = &triangleShape->m_vertices1[0];
-		btVector3 dots(dir.dot(vertices[0]), dir.dot(vertices[1]), dir.dot(vertices[2]));
+        btVector3 dots = dir.dot3(vertices[0], vertices[1], vertices[2]);
 		btVector3 sup = vertices[dots.maxAxis()];
 		return btVector3(sup.getX(),sup.getY(),sup.getZ());
 	}
@@ -227,7 +243,7 @@ btVector3 btConvexShape::localGetSupportVertexWithoutMarginNonVirtual (const btV
 			pos[capsuleUpAxis] = halfHeight;
 
 			//vtx = pos +vec*(radius);
-			vtx = pos +vec*capsuleShape->getLocalScalingNV()*(radius) - vec * capsuleShape->getMarginNV();
+			vtx = pos +vec*(radius) - vec * capsuleShape->getMarginNV();
 			newDot = vec.dot(vtx);
 			
 
@@ -242,7 +258,7 @@ btVector3 btConvexShape::localGetSupportVertexWithoutMarginNonVirtual (const btV
 			pos[capsuleUpAxis] = -halfHeight;
 
 			//vtx = pos +vec*(radius);
-			vtx = pos +vec*capsuleShape->getLocalScalingNV()*(radius) - vec * capsuleShape->getMarginNV();
+			vtx = pos +vec*(radius) - vec * capsuleShape->getMarginNV();
 			newDot = vec.dot(vtx);
 			if (newDot > maxDot)
 			{
@@ -366,8 +382,8 @@ void btConvexShape::getAabbNonVirtual (const btTransform& t, btVector3& aabbMin,
 		halfExtents += btVector3(margin,margin,margin);
 		btMatrix3x3 abs_b = t.getBasis().absolute();  
 		btVector3 center = t.getOrigin();
-		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
-		
+        btVector3 extent = halfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);    
+        
 		aabbMin = center - extent;
 		aabbMax = center + extent;
 		break;
@@ -400,7 +416,7 @@ void btConvexShape::getAabbNonVirtual (const btTransform& t, btVector3& aabbMin,
 		halfExtents += btVector3(capsuleShape->getMarginNonVirtual(),capsuleShape->getMarginNonVirtual(),capsuleShape->getMarginNonVirtual());
 		btMatrix3x3 abs_b = t.getBasis().absolute();  
 		btVector3 center = t.getOrigin();
-		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));		  	
+        btVector3 extent = halfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);    
 		aabbMin = center - extent;
 		aabbMax = center + extent;
 	}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.h
index 9c158259c..290cd9fd1 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONVEX_SHAPE_INTERFACE1
-#define CONVEX_SHAPE_INTERFACE1
+#ifndef BT_CONVEX_SHAPE_INTERFACE1
+#define BT_CONVEX_SHAPE_INTERFACE1
 
 #include "btCollisionShape.h"
 
@@ -52,6 +52,8 @@ public:
 	btScalar getMarginNonVirtual () const;
 	void getAabbNonVirtual (const btTransform& t, btVector3& aabbMin, btVector3& aabbMax) const;
 
+	virtual void project(const btTransform& trans, const btVector3& dir, btScalar& min, btScalar& max) const;
+
 	
 	//notice that the vectors should be unit length
 	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const= 0;
@@ -79,4 +81,4 @@ public:
 
 
 
-#endif //CONVEX_SHAPE_INTERFACE1
+#endif //BT_CONVEX_SHAPE_INTERFACE1
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h
index f5167e74b..f338865ca 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h
@@ -12,8 +12,8 @@ subject to the following restrictions:
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
-#ifndef CONVEX_TRIANGLEMESH_SHAPE_H
-#define CONVEX_TRIANGLEMESH_SHAPE_H
+#ifndef BT_CONVEX_TRIANGLEMESH_SHAPE_H
+#define BT_CONVEX_TRIANGLEMESH_SHAPE_H
 
 
 #include "btPolyhedralConvexShape.h"
@@ -22,12 +22,14 @@ subject to the following restrictions:
 
 /// The btConvexTriangleMeshShape is a convex hull of a triangle mesh, but the performance is not as good as btConvexHullShape.
 /// A small benefit of this class is that it uses the btStridingMeshInterface, so you can avoid the duplication of the triangle mesh data. Nevertheless, most users should use the much better performing btConvexHullShape instead.
-class btConvexTriangleMeshShape : public btPolyhedralConvexAabbCachingShape
+ATTRIBUTE_ALIGNED16(class) btConvexTriangleMeshShape : public btPolyhedralConvexAabbCachingShape
 {
 
 	class btStridingMeshInterface*	m_stridingMesh;
 
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btConvexTriangleMeshShape(btStridingMeshInterface* meshInterface, bool calcAabb = true);
 
 	class btStridingMeshInterface*	getMeshInterface()
@@ -69,7 +71,7 @@ public:
 
 
 
-#endif //CONVEX_TRIANGLEMESH_SHAPE_H
+#endif //BT_CONVEX_TRIANGLEMESH_SHAPE_H
 
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.cpp
index 268809304..6cfe43be4 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.cpp
@@ -19,6 +19,8 @@ btCylinderShape::btCylinderShape (const btVector3& halfExtents)
 :btConvexInternalShape(),
 m_upAxis(1)
 {
+	setSafeMargin(halfExtents);
+
 	btVector3 margin(getMargin(),getMargin(),getMargin());
 	m_implicitShapeDimensions = (halfExtents * m_localScaling) - margin;
 	m_shapeType = CYLINDER_SHAPE_PROXYTYPE;
@@ -47,7 +49,64 @@ void btCylinderShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3&
 
 void	btCylinderShape::calculateLocalInertia(btScalar mass,btVector3& inertia) const
 {
-	//approximation of box shape, todo: implement cylinder shape inertia before people notice ;-)
+
+//Until Bullet 2.77 a box approximation was used, so uncomment this if you need backwards compatibility
+//#define USE_BOX_INERTIA_APPROXIMATION 1
+#ifndef USE_BOX_INERTIA_APPROXIMATION
+
+	/*
+	cylinder is defined as following:
+	*
+	* - principle axis aligned along y by default, radius in x, z-value not used
+	* - for btCylinderShapeX: principle axis aligned along x, radius in y direction, z-value not used
+	* - for btCylinderShapeZ: principle axis aligned along z, radius in x direction, y-value not used
+	*
+	*/
+
+	btScalar radius2;	// square of cylinder radius
+	btScalar height2;	// square of cylinder height
+	btVector3 halfExtents = getHalfExtentsWithMargin();	// get cylinder dimension
+	btScalar div12 = mass / 12.f;
+	btScalar div4 = mass / 4.f;
+	btScalar div2 = mass / 2.f;
+	int idxRadius, idxHeight;
+
+	switch (m_upAxis)	// get indices of radius and height of cylinder
+	{
+		case 0:		// cylinder is aligned along x
+			idxRadius = 1;
+			idxHeight = 0;
+			break;
+		case 2:		// cylinder is aligned along z
+			idxRadius = 0;
+			idxHeight = 2;
+			break;
+		default:	// cylinder is aligned along y
+			idxRadius = 0;
+			idxHeight = 1;
+	}
+
+	// calculate squares
+	radius2 = halfExtents[idxRadius] * halfExtents[idxRadius];
+	height2 = btScalar(4.) * halfExtents[idxHeight] * halfExtents[idxHeight];
+
+	// calculate tensor terms
+	btScalar t1 = div12 * height2 + div4 * radius2;
+	btScalar t2 = div2 * radius2;
+
+	switch (m_upAxis)	// set diagonal elements of inertia tensor
+	{
+		case 0:		// cylinder is aligned along x
+			inertia.setValue(t2,t1,t1);
+			break;
+		case 2:		// cylinder is aligned along z
+			inertia.setValue(t1,t1,t2);
+			break;
+		default:	// cylinder is aligned along y
+			inertia.setValue(t1,t2,t1);
+	}
+#else //USE_BOX_INERTIA_APPROXIMATION
+	//approximation of box shape
 	btVector3 halfExtents = getHalfExtentsWithMargin();
 
 	btScalar lx=btScalar(2.)*(halfExtents.x());
@@ -57,7 +116,7 @@ void	btCylinderShape::calculateLocalInertia(btScalar mass,btVector3& inertia) co
 	inertia.setValue(mass/(btScalar(12.0)) * (ly*ly + lz*lz),
 					mass/(btScalar(12.0)) * (lx*lx + lz*lz),
 					mass/(btScalar(12.0)) * (lx*lx + ly*ly));
-
+#endif //USE_BOX_INERTIA_APPROXIMATION
 }
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.h
index a92f9fc44..6f796950e 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btCylinderShape.h
@@ -13,15 +13,15 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CYLINDER_MINKOWSKI_H
-#define CYLINDER_MINKOWSKI_H
+#ifndef BT_CYLINDER_MINKOWSKI_H
+#define BT_CYLINDER_MINKOWSKI_H
 
 #include "btBoxShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
 #include "LinearMath/btVector3.h"
 
 /// The btCylinderShape class implements a cylinder shape primitive, centered around the origin. Its central axis aligned with the Y axis. btCylinderShapeX is aligned with the X axis and btCylinderShapeZ around the Z axis.
-class btCylinderShape : public btConvexInternalShape
+ATTRIBUTE_ALIGNED16(class) btCylinderShape : public btConvexInternalShape
 
 {
 
@@ -31,6 +31,8 @@ protected:
 
 public:
 
+BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btVector3 getHalfExtentsWithMargin() const
 	{
 		btVector3 halfExtents = getHalfExtentsWithoutMargin();
@@ -95,24 +97,48 @@ public:
 		return m_upAxis;
 	}
 
+	virtual btVector3	getAnisotropicRollingFrictionDirection() const
+	{
+		btVector3 aniDir(0,0,0);
+		aniDir[getUpAxis()]=1;
+		return aniDir;
+	}
+
 	virtual btScalar getRadius() const
 	{
 		return getHalfExtentsWithMargin().getX();
 	}
 
+	virtual void	setLocalScaling(const btVector3& scaling)
+	{
+		btVector3 oldMargin(getMargin(),getMargin(),getMargin());
+		btVector3 implicitShapeDimensionsWithMargin = m_implicitShapeDimensions+oldMargin;
+		btVector3 unScaledImplicitShapeDimensionsWithMargin = implicitShapeDimensionsWithMargin / m_localScaling;
+
+		btConvexInternalShape::setLocalScaling(scaling);
+
+		m_implicitShapeDimensions = (unScaledImplicitShapeDimensionsWithMargin * m_localScaling) - oldMargin;
+
+	}
+
 	//debugging
 	virtual const char*	getName()const
 	{
 		return "CylinderY";
 	}
 
+	virtual	int	calculateSerializeBufferSize() const;
 
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
 
 };
 
 class btCylinderShapeX : public btCylinderShape
 {
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btCylinderShapeX (const btVector3& halfExtents);
 
 	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
@@ -134,15 +160,13 @@ public:
 class btCylinderShapeZ : public btCylinderShape
 {
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btCylinderShapeZ (const btVector3& halfExtents);
 
 	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
 	virtual void	batchedUnitVectorGetSupportingVertexWithoutMargin(const btVector3* vectors,btVector3* supportVerticesOut,int numVectors) const;
 
-	virtual int	getUpAxis() const
-	{
-		return 2;
-	}
 		//debugging
 	virtual const char*	getName()const
 	{
@@ -156,6 +180,34 @@ public:
 
 };
 
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btCylinderShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
 
-#endif //CYLINDER_MINKOWSKI_H
+	int	m_upAxis;
+
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btCylinderShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btCylinderShapeData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btCylinderShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btCylinderShapeData* shapeData = (btCylinderShapeData*) dataBuffer;
+	
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData,serializer);
+
+	shapeData->m_upAxis = m_upAxis;
+	
+	return "btCylinderShapeData";
+}
+
+
+
+#endif //BT_CYLINDER_MINKOWSKI_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btEmptyShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btEmptyShape.h
index 9f6b4435c..069a79402 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btEmptyShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btEmptyShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef EMPTY_SHAPE_H
-#define EMPTY_SHAPE_H
+#ifndef BT_EMPTY_SHAPE_H
+#define BT_EMPTY_SHAPE_H
 
 #include "btConcaveShape.h"
 
@@ -28,9 +28,11 @@ subject to the following restrictions:
 
 /// The btEmptyShape is a collision shape without actual collision detection shape, so most users should ignore this class.
 /// It can be replaced by another shape during runtime, but the inertia tensor should be recomputed.
-class btEmptyShape	: public btConcaveShape
+ATTRIBUTE_ALIGNED16(class) btEmptyShape	: public btConcaveShape
 {
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btEmptyShape();
 
 	virtual ~btEmptyShape();
@@ -67,4 +69,4 @@ protected:
 
 
 
-#endif //EMPTY_SHAPE_H
+#endif //BT_EMPTY_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
index 3a1e6f4a2..8d4080a63 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp
@@ -21,7 +21,7 @@ subject to the following restrictions:
 
 btHeightfieldTerrainShape::btHeightfieldTerrainShape
 (
-int heightStickWidth, int heightStickLength, void* heightfieldData,
+int heightStickWidth, int heightStickLength, const void* heightfieldData,
 btScalar heightScale, btScalar minHeight, btScalar maxHeight,int upAxis,
 PHY_ScalarType hdt, bool flipQuadEdges
 )
@@ -33,12 +33,12 @@ PHY_ScalarType hdt, bool flipQuadEdges
 
 
 
-btHeightfieldTerrainShape::btHeightfieldTerrainShape(int heightStickWidth, int heightStickLength,void* heightfieldData,btScalar maxHeight,int upAxis,bool useFloatData,bool flipQuadEdges)
+btHeightfieldTerrainShape::btHeightfieldTerrainShape(int heightStickWidth, int heightStickLength,const void* heightfieldData,btScalar maxHeight,int upAxis,bool useFloatData,bool flipQuadEdges)
 {
 	// legacy constructor: support only float or unsigned char,
 	// 	and min height is zero
 	PHY_ScalarType hdt = (useFloatData) ? PHY_FLOAT : PHY_UCHAR;
-	btScalar minHeight = 0.0;
+	btScalar minHeight = 0.0f;
 
 	// previously, height = uchar * maxHeight / 65535.
 	// So to preserve legacy behavior, heightScale = maxHeight / 65535
@@ -53,7 +53,7 @@ btHeightfieldTerrainShape::btHeightfieldTerrainShape(int heightStickWidth, int h
 
 void btHeightfieldTerrainShape::initialize
 (
-int heightStickWidth, int heightStickLength, void* heightfieldData,
+int heightStickWidth, int heightStickLength, const void* heightfieldData,
 btScalar heightScale, btScalar minHeight, btScalar maxHeight, int upAxis,
 PHY_ScalarType hdt, bool flipQuadEdges
 )
@@ -82,6 +82,7 @@ PHY_ScalarType hdt, bool flipQuadEdges
 	m_heightDataType = hdt;
 	m_flipQuadEdges = flipQuadEdges;
 	m_useDiamondSubdivision = false;
+	m_useZigzagSubdivision = false;
 	m_upAxis = upAxis;
 	m_localScaling.setValue(btScalar(1.), btScalar(1.), btScalar(1.));
 
@@ -135,9 +136,7 @@ void btHeightfieldTerrainShape::getAabb(const btTransform& t,btVector3& aabbMin,
 
 	btMatrix3x3 abs_b = t.getBasis().absolute();  
 	btVector3 center = t.getOrigin();
-	btVector3 extent = btVector3(abs_b[0].dot(halfExtents),
-		   abs_b[1].dot(halfExtents),
-		  abs_b[2].dot(halfExtents));
+    btVector3 extent = halfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);
 	extent += btVector3(getMargin(),getMargin(),getMargin());
 
 	aabbMin = center - extent;
@@ -362,7 +361,7 @@ void	btHeightfieldTerrainShape::processAllTriangles(btTriangleCallback* callback
 		for(int x=startX; x<endX; x++)
 		{
 			btVector3 vertices[3];
-			if (m_flipQuadEdges || (m_useDiamondSubdivision && !((j+x) & 1)))
+			if (m_flipQuadEdges || (m_useDiamondSubdivision && !((j+x) & 1))|| (m_useZigzagSubdivision  && !(j & 1)))
 			{
         //first triangle
         getVertex(x,j,vertices[0]);
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
index 4f5d1e35b..4a7a4a4bd 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef HEIGHTFIELD_TERRAIN_SHAPE_H
-#define HEIGHTFIELD_TERRAIN_SHAPE_H
+#ifndef BT_HEIGHTFIELD_TERRAIN_SHAPE_H
+#define BT_HEIGHTFIELD_TERRAIN_SHAPE_H
 
 #include "btConcaveShape.h"
 
@@ -68,7 +68,7 @@ subject to the following restrictions:
 
   For usage and testing see the TerrainDemo.
  */
-class btHeightfieldTerrainShape : public btConcaveShape
+ATTRIBUTE_ALIGNED16(class) btHeightfieldTerrainShape : public btConcaveShape
 {
 protected:
 	btVector3	m_localAabbMin;
@@ -85,15 +85,16 @@ protected:
 	btScalar m_heightScale;
 	union
 	{
-		unsigned char*	m_heightfieldDataUnsignedChar;
-		short*		m_heightfieldDataShort;
-		btScalar*			m_heightfieldDataFloat;
-		void*			m_heightfieldDataUnknown;
+		const unsigned char*	m_heightfieldDataUnsignedChar;
+		const short*		m_heightfieldDataShort;
+		const btScalar*			m_heightfieldDataFloat;
+		const void*	m_heightfieldDataUnknown;
 	};
 
 	PHY_ScalarType	m_heightDataType;	
 	bool	m_flipQuadEdges;
-  bool  m_useDiamondSubdivision;
+  	bool  m_useDiamondSubdivision;
+	bool m_useZigzagSubdivision;
 
 	int	m_upAxis;
 	
@@ -111,11 +112,14 @@ protected:
 	  backwards-compatible without a lot of copy/paste.
 	 */
 	void initialize(int heightStickWidth, int heightStickLength,
-	                void* heightfieldData, btScalar heightScale,
+	                const void* heightfieldData, btScalar heightScale,
 	                btScalar minHeight, btScalar maxHeight, int upAxis,
 	                PHY_ScalarType heightDataType, bool flipQuadEdges);
 
 public:
+	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	/// preferred constructor
 	/**
 	  This constructor supports a range of heightfield
@@ -123,7 +127,7 @@ public:
 	  heightScale is needed for any integer-based heightfield data types.
 	 */
 	btHeightfieldTerrainShape(int heightStickWidth,int heightStickLength,
-	                          void* heightfieldData, btScalar heightScale,
+	                          const void* heightfieldData, btScalar heightScale,
 	                          btScalar minHeight, btScalar maxHeight,
 	                          int upAxis, PHY_ScalarType heightDataType,
 	                          bool flipQuadEdges);
@@ -135,13 +139,15 @@ public:
 	  compatibility reasons, heightScale is calculated as maxHeight / 65535 
 	  (and is only used when useFloatData = false).
  	 */
-	btHeightfieldTerrainShape(int heightStickWidth,int heightStickLength,void* heightfieldData, btScalar maxHeight,int upAxis,bool useFloatData,bool flipQuadEdges);
+	btHeightfieldTerrainShape(int heightStickWidth,int heightStickLength,const void* heightfieldData, btScalar maxHeight,int upAxis,bool useFloatData,bool flipQuadEdges);
 
 	virtual ~btHeightfieldTerrainShape();
 
 
 	void setUseDiamondSubdivision(bool useDiamondSubdivision=true) { m_useDiamondSubdivision = useDiamondSubdivision;}
 
+	///could help compatibility with Ogre heightfields. See https://code.google.com/p/bullet/issues/detail?id=625	
+	void setUseZigzagSubdivision(bool useZigzagSubdivision=true) { m_useZigzagSubdivision = useZigzagSubdivision;}
 
 	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const;
 
@@ -158,4 +164,4 @@ public:
 
 };
 
-#endif //HEIGHTFIELD_TERRAIN_SHAPE_H
+#endif //BT_HEIGHTFIELD_TERRAIN_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMaterial.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMaterial.h
index 3f2c892a4..866f9b4da 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMaterial.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMaterial.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 
 /// This file was created by Alex Silverman
 
-#ifndef MATERIAL_H
-#define MATERIAL_H
+#ifndef BT_MATERIAL_H
+#define BT_MATERIAL_H
 
 // Material class to be used by btMultimaterialTriangleMeshShape to store triangle properties
 class btMaterial
@@ -31,4 +31,5 @@ public:
     btMaterial(btScalar fric, btScalar rest) { m_friction = fric; m_restitution = rest; }
 };
 
-#endif // MATERIAL_H
\ No newline at end of file
+#endif // BT_MATERIAL_H
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMinkowskiSumShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMinkowskiSumShape.h
index d6fd04021..a3f9a4723 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMinkowskiSumShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMinkowskiSumShape.h
@@ -13,14 +13,14 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef MINKOWSKI_SUM_SHAPE_H
-#define MINKOWSKI_SUM_SHAPE_H
+#ifndef BT_MINKOWSKI_SUM_SHAPE_H
+#define BT_MINKOWSKI_SUM_SHAPE_H
 
 #include "btConvexInternalShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
 
 /// The btMinkowskiSumShape is only for advanced users. This shape represents implicit based minkowski sum of two convex implicit shapes.
-class btMinkowskiSumShape : public btConvexInternalShape
+ATTRIBUTE_ALIGNED16(class) btMinkowskiSumShape : public btConvexInternalShape
 {
 
 	btTransform	m_transA;
@@ -30,6 +30,8 @@ class btMinkowskiSumShape : public btConvexInternalShape
 
 public:
 
+BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btMinkowskiSumShape(const btConvexShape* shapeA,const btConvexShape* shapeB);
 
 	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
@@ -57,4 +59,4 @@ public:
 	}
 };
 
-#endif //MINKOWSKI_SUM_SHAPE_H
+#endif //BT_MINKOWSKI_SUM_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.cpp
index f130d30fa..5bae24250 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.cpp
@@ -18,6 +18,7 @@ subject to the following restrictions:
 #include "btMultiSphereShape.h"
 #include "BulletCollision/CollisionShapes/btCollisionMargin.h"
 #include "LinearMath/btQuaternion.h"
+#include "LinearMath/btSerializer.h"
 
 btMultiSphereShape::btMultiSphereShape (const btVector3* positions,const btScalar* radi,int numSpheres)
 :btConvexInternalAabbCachingShape ()
@@ -38,10 +39,11 @@ btMultiSphereShape::btMultiSphereShape (const btVector3* positions,const btScala
 
 }
 
- 
+#ifndef MIN
+    #define MIN( _a, _b)    ((_a) < (_b) ? (_a) : (_b))
+#endif
  btVector3	btMultiSphereShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
 {
-	int i;
 	btVector3 supVec(0,0,0);
 
 	btScalar maxDot(btScalar(-BT_LARGE_FLOAT));
@@ -65,18 +67,23 @@ btMultiSphereShape::btMultiSphereShape (const btVector3* positions,const btScala
 	const btScalar* rad = &m_radiArray[0];
 	int numSpheres = m_localPositionArray.size();
 
-	for (i=0;i<numSpheres;i++)
-	{
-		vtx = (*pos) +vec*m_localScaling*(*rad) - vec * getMargin();
-		pos++;
-		rad++;
-		newDot = vec.dot(vtx);
-		if (newDot > maxDot)
+    for( int k = 0; k < numSpheres; k+= 128 )
+    {
+        btVector3 temp[128];
+        int inner_count = MIN( numSpheres - k, 128 );
+        for( long i = 0; i < inner_count; i++ )
+        {
+            temp[i] = (*pos) +vec*m_localScaling*(*rad) - vec * getMargin();
+            pos++;
+            rad++;
+        }
+        long i = vec.maxDot( temp, inner_count, newDot);
+        if( newDot > maxDot )
 		{
 			maxDot = newDot;
-			supVec = vtx;
+			supVec = temp[i];
 		}
-	}
+    }
 
 	return supVec;
 
@@ -97,18 +104,25 @@ btMultiSphereShape::btMultiSphereShape (const btVector3* positions,const btScala
 		const btVector3* pos = &m_localPositionArray[0];
 		const btScalar* rad = &m_radiArray[0];
 		int numSpheres = m_localPositionArray.size();
-		for (int i=0;i<numSpheres;i++)
-		{
-			vtx = (*pos) +vec*m_localScaling*(*rad) - vec * getMargin();
-			pos++;
-			rad++;
-			newDot = vec.dot(vtx);
-			if (newDot > maxDot)
-			{
-				maxDot = newDot;
-				supportVerticesOut[j] = vtx;
-			}
-		}
+
+        for( int k = 0; k < numSpheres; k+= 128 )
+        {
+            btVector3 temp[128];
+            int inner_count = MIN( numSpheres - k, 128 );
+            for( long i = 0; i < inner_count; i++ )
+            {
+                temp[i] = (*pos) +vec*m_localScaling*(*rad) - vec * getMargin();
+                pos++;
+                rad++;
+            }
+            long i = vec.maxDot( temp, inner_count, newDot);
+            if( newDot > maxDot )
+            {
+                maxDot = newDot;
+                supportVerticesOut[j] = temp[i];
+            }
+        }
+        
 	}
 }
 
@@ -138,3 +152,29 @@ void	btMultiSphereShape::calculateLocalInertia(btScalar mass,btVector3& inertia)
 }
 
 
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btMultiSphereShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btMultiSphereShapeData* shapeData = (btMultiSphereShapeData*) dataBuffer;
+	btConvexInternalShape::serialize(&shapeData->m_convexInternalShapeData, serializer);
+
+	int numElem = m_localPositionArray.size();
+	shapeData->m_localPositionArrayPtr = numElem ? (btPositionAndRadius*)serializer->getUniquePointer((void*)&m_localPositionArray[0]):  0;
+	
+	shapeData->m_localPositionArraySize = numElem;
+	if (numElem)
+	{
+		btChunk* chunk = serializer->allocate(sizeof(btPositionAndRadius),numElem);
+		btPositionAndRadius* memPtr = (btPositionAndRadius*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_localPositionArray[i].serializeFloat(memPtr->m_pos);
+			memPtr->m_radius = float(m_radiArray[i]);
+		}
+		serializer->finalizeChunk(chunk,"btPositionAndRadius",BT_ARRAY_CODE,(void*)&m_localPositionArray[0]);
+	}
+	
+	return "btMultiSphereShapeData";
+}
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.h
index f1c4cbf34..5d3b40268 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultiSphereShape.h
@@ -13,23 +13,27 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef MULTI_SPHERE_MINKOWSKI_H
-#define MULTI_SPHERE_MINKOWSKI_H
+#ifndef BT_MULTI_SPHERE_MINKOWSKI_H
+#define BT_MULTI_SPHERE_MINKOWSKI_H
 
 #include "btConvexInternalShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
 #include "LinearMath/btAlignedObjectArray.h"
 #include "LinearMath/btAabbUtil2.h"
 
+
+
 ///The btMultiSphereShape represents the convex hull of a collection of spheres. You can create special capsules or other smooth volumes.
 ///It is possible to animate the spheres for deformation, but call 'recalcLocalAabb' after changing any sphere position/radius
-class btMultiSphereShape : public btConvexInternalAabbCachingShape
+ATTRIBUTE_ALIGNED16(class) btMultiSphereShape : public btConvexInternalAabbCachingShape
 {
 	
 	btAlignedObjectArray<btVector3> m_localPositionArray;
 	btAlignedObjectArray<btScalar>  m_radiArray;
 	
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btMultiSphereShape (const btVector3* positions,const btScalar* radi,int numSpheres);
 
 	///CollisionShape Interface
@@ -61,8 +65,37 @@ public:
 		return "MultiSphere";
 	}
 
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 
 };
 
 
-#endif //MULTI_SPHERE_MINKOWSKI_H
+struct	btPositionAndRadius
+{
+	btVector3FloatData	m_pos;
+	float		m_radius;
+};
+
+struct	btMultiSphereShapeData
+{
+	btConvexInternalShapeData	m_convexInternalShapeData;
+
+	btPositionAndRadius	*m_localPositionArrayPtr;
+	int				m_localPositionArraySize;
+	char	m_padding[4];
+};
+
+
+
+SIMD_FORCE_INLINE	int	btMultiSphereShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btMultiSphereShapeData);
+}
+
+
+
+#endif //BT_MULTI_SPHERE_MINKOWSKI_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h
index 231c58549..2b92ab7d1 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 
 /// This file was created by Alex Silverman
 
-#ifndef BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
-#define BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
+#ifndef BT_BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
+#define BT_BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
 
 #include "btBvhTriangleMeshShape.h"
 #include "btMaterial.h"
@@ -31,13 +31,11 @@ public:
 
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
-    btMultimaterialTriangleMeshShape(): btBvhTriangleMeshShape() {m_shapeType = MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE;}
     btMultimaterialTriangleMeshShape(btStridingMeshInterface* meshInterface, bool useQuantizedAabbCompression, bool buildBvh = true):
         btBvhTriangleMeshShape(meshInterface, useQuantizedAabbCompression, buildBvh)
         {
             m_shapeType = MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE;
 
-            btVector3 m_triangle[3];
             const unsigned char *vertexbase;
             int numverts;
             PHY_ScalarType type;
@@ -71,7 +69,6 @@ public:
         {
             m_shapeType = MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE;
 
-            btVector3 m_triangle[3];
             const unsigned char *vertexbase;
             int numverts;
             PHY_ScalarType type;
@@ -120,4 +117,4 @@ public:
 }
 ;
 
-#endif //BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
+#endif //BT_BVH_TRIANGLE_MATERIAL_MESH_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
index 981b8a265..6f36775f7 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.cpp
@@ -43,7 +43,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
 
 		NodeTriangleCallback& operator=(NodeTriangleCallback& other)
 		{
-			m_triangleNodes = other.m_triangleNodes;
+			m_triangleNodes.copyFromArray(other.m_triangleNodes);
 			return *this;
 		}
 		
@@ -84,7 +84,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
 
 		QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other)
 		{
-			m_triangleNodes = other.m_triangleNodes;
+			m_triangleNodes.copyFromArray(other.m_triangleNodes);
 			m_optimizedTree = other.m_optimizedTree;
 			return *this;
 		}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.h
index 8f3a29f7e..715961f55 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btOptimizedBvh.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 
 ///Contains contributions from Disney Studio's
 
-#ifndef OPTIMIZED_BVH_H
-#define OPTIMIZED_BVH_H
+#ifndef BT_OPTIMIZED_BVH_H
+#define BT_OPTIMIZED_BVH_H
 
 #include "BulletCollision/BroadphaseCollision/btQuantizedBvh.h"
 
@@ -47,7 +47,7 @@ public:
 	void	updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index);
 
 	/// Data buffer MUST be 16 byte aligned
-	virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian)
+	virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const
 	{
 		return btQuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
 
@@ -60,6 +60,6 @@ public:
 };
 
 
-#endif //OPTIMIZED_BVH_H
+#endif //BT_OPTIMIZED_BVH_H
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp
index b1ecb3e43..7dec689bd 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.cpp
@@ -14,12 +14,309 @@ subject to the following restrictions:
 */
 
 #include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
+#include "btConvexPolyhedron.h"
+#include "LinearMath/btConvexHullComputer.h"
+#include <new>
+#include "LinearMath/btGeometryUtil.h"
+#include "LinearMath/btGrahamScan2dConvexHull.h"
 
-btPolyhedralConvexShape::btPolyhedralConvexShape() :btConvexInternalShape()
+
+btPolyhedralConvexShape::btPolyhedralConvexShape() :btConvexInternalShape(),
+m_polyhedron(0)
 {
 
 }
 
+btPolyhedralConvexShape::~btPolyhedralConvexShape()
+{
+	if (m_polyhedron)
+	{
+		m_polyhedron->~btConvexPolyhedron();
+		btAlignedFree(m_polyhedron);
+	}
+}
+
+
+bool	btPolyhedralConvexShape::initializePolyhedralFeatures(int shiftVerticesByMargin)
+{
+
+	if (m_polyhedron)
+	{
+		m_polyhedron->~btConvexPolyhedron();
+		btAlignedFree(m_polyhedron);
+	}
+	
+	void* mem = btAlignedAlloc(sizeof(btConvexPolyhedron),16);
+	m_polyhedron = new (mem) btConvexPolyhedron;
+
+	btAlignedObjectArray<btVector3> orgVertices;
+
+	for (int i=0;i<getNumVertices();i++)
+	{
+		btVector3& newVertex = orgVertices.expand();
+		getVertex(i,newVertex);
+	}
+	
+	btConvexHullComputer conv;
+	
+	if (shiftVerticesByMargin)
+	{
+		btAlignedObjectArray<btVector3> planeEquations;
+		btGeometryUtil::getPlaneEquationsFromVertices(orgVertices,planeEquations);
+
+		btAlignedObjectArray<btVector3> shiftedPlaneEquations;
+		for (int p=0;p<planeEquations.size();p++)
+		{
+			   btVector3 plane = planeEquations[p];
+		//	   btScalar margin = getMargin();
+			   plane[3] -= getMargin();
+			   shiftedPlaneEquations.push_back(plane);
+		}
+
+		btAlignedObjectArray<btVector3> tmpVertices;
+
+		btGeometryUtil::getVerticesFromPlaneEquations(shiftedPlaneEquations,tmpVertices);
+	
+		conv.compute(&tmpVertices[0].getX(), sizeof(btVector3),tmpVertices.size(),0.f,0.f);
+	} else
+	{
+		
+		conv.compute(&orgVertices[0].getX(), sizeof(btVector3),orgVertices.size(),0.f,0.f);
+	}
+
+
+
+	btAlignedObjectArray<btVector3> faceNormals;
+	int numFaces = conv.faces.size();
+	faceNormals.resize(numFaces);
+	btConvexHullComputer* convexUtil = &conv;
+
+	
+	btAlignedObjectArray<btFace>	tmpFaces;
+	tmpFaces.resize(numFaces);
+
+	int numVertices = convexUtil->vertices.size();
+	m_polyhedron->m_vertices.resize(numVertices);
+	for (int p=0;p<numVertices;p++)
+	{
+		m_polyhedron->m_vertices[p] = convexUtil->vertices[p];
+	}
+
+
+	for (int i=0;i<numFaces;i++)
+	{
+		int face = convexUtil->faces[i];
+		//printf("face=%d\n",face);
+		const btConvexHullComputer::Edge*  firstEdge = &convexUtil->edges[face];
+		const btConvexHullComputer::Edge*  edge = firstEdge;
+
+		btVector3 edges[3];
+		int numEdges = 0;
+		//compute face normals
+
+		do
+		{
+			
+			int src = edge->getSourceVertex();
+			tmpFaces[i].m_indices.push_back(src);
+			int targ = edge->getTargetVertex();
+			btVector3 wa = convexUtil->vertices[src];
+
+			btVector3 wb = convexUtil->vertices[targ];
+			btVector3 newEdge = wb-wa;
+			newEdge.normalize();
+			if (numEdges<2)
+				edges[numEdges++] = newEdge;
+
+			edge = edge->getNextEdgeOfFace();
+		} while (edge!=firstEdge);
+
+		btScalar planeEq = 1e30f;
+
+		
+		if (numEdges==2)
+		{
+			faceNormals[i] = edges[0].cross(edges[1]);
+			faceNormals[i].normalize();
+			tmpFaces[i].m_plane[0] = faceNormals[i].getX();
+			tmpFaces[i].m_plane[1] = faceNormals[i].getY();
+			tmpFaces[i].m_plane[2] = faceNormals[i].getZ();
+			tmpFaces[i].m_plane[3] = planeEq;
+
+		}
+		else
+		{
+			btAssert(0);//degenerate?
+			faceNormals[i].setZero();
+		}
+
+		for (int v=0;v<tmpFaces[i].m_indices.size();v++)
+		{
+			btScalar eq = m_polyhedron->m_vertices[tmpFaces[i].m_indices[v]].dot(faceNormals[i]);
+			if (planeEq>eq)
+			{
+				planeEq=eq;
+			}
+		}
+		tmpFaces[i].m_plane[3] = -planeEq;
+	}
+
+	//merge coplanar faces and copy them to m_polyhedron
+
+	btScalar faceWeldThreshold= 0.999f;
+	btAlignedObjectArray<int> todoFaces;
+	for (int i=0;i<tmpFaces.size();i++)
+		todoFaces.push_back(i);
+
+	while (todoFaces.size())
+	{
+		btAlignedObjectArray<int> coplanarFaceGroup;
+		int refFace = todoFaces[todoFaces.size()-1];
+
+		coplanarFaceGroup.push_back(refFace);
+		btFace& faceA = tmpFaces[refFace];
+		todoFaces.pop_back();
+
+		btVector3 faceNormalA(faceA.m_plane[0],faceA.m_plane[1],faceA.m_plane[2]);
+		for (int j=todoFaces.size()-1;j>=0;j--)
+		{
+			int i = todoFaces[j];
+			btFace& faceB = tmpFaces[i];
+			btVector3 faceNormalB(faceB.m_plane[0],faceB.m_plane[1],faceB.m_plane[2]);
+			if (faceNormalA.dot(faceNormalB)>faceWeldThreshold)
+			{
+				coplanarFaceGroup.push_back(i);
+				todoFaces.remove(i);
+			}
+		}
+
+
+		bool did_merge = false;
+		if (coplanarFaceGroup.size()>1)
+		{
+			//do the merge: use Graham Scan 2d convex hull
+
+			btAlignedObjectArray<GrahamVector3> orgpoints;
+			btVector3 averageFaceNormal(0,0,0);
+
+			for (int i=0;i<coplanarFaceGroup.size();i++)
+			{
+//				m_polyhedron->m_faces.push_back(tmpFaces[coplanarFaceGroup[i]]);
+
+				btFace& face = tmpFaces[coplanarFaceGroup[i]];
+				btVector3 faceNormal(face.m_plane[0],face.m_plane[1],face.m_plane[2]);
+				averageFaceNormal+=faceNormal;
+				for (int f=0;f<face.m_indices.size();f++)
+				{
+					int orgIndex = face.m_indices[f];
+					btVector3 pt = m_polyhedron->m_vertices[orgIndex];
+					
+					bool found = false;
+
+					for (int i=0;i<orgpoints.size();i++)
+					{
+						//if ((orgpoints[i].m_orgIndex == orgIndex) || ((rotatedPt-orgpoints[i]).length2()<0.0001))
+						if (orgpoints[i].m_orgIndex == orgIndex)
+						{
+							found=true;
+							break;
+						}
+					}
+					if (!found)
+						orgpoints.push_back(GrahamVector3(pt,orgIndex));
+				}
+			}
+
+			
+
+			btFace combinedFace;
+			for (int i=0;i<4;i++)
+				combinedFace.m_plane[i] = tmpFaces[coplanarFaceGroup[0]].m_plane[i];
+
+			btAlignedObjectArray<GrahamVector3> hull;
+
+			averageFaceNormal.normalize();
+			GrahamScanConvexHull2D(orgpoints,hull,averageFaceNormal);
+
+			for (int i=0;i<hull.size();i++)
+			{
+				combinedFace.m_indices.push_back(hull[i].m_orgIndex);
+				for(int k = 0; k < orgpoints.size(); k++) 
+				{
+					if(orgpoints[k].m_orgIndex == hull[i].m_orgIndex) 
+					{
+						orgpoints[k].m_orgIndex = -1; // invalidate...
+						break;
+					}
+				}
+			}
+
+			// are there rejected vertices?
+			bool reject_merge = false;
+			
+
+
+			for(int i = 0; i < orgpoints.size(); i++) {
+				if(orgpoints[i].m_orgIndex == -1)
+					continue; // this is in the hull...
+				// this vertex is rejected -- is anybody else using this vertex?
+				for(int j = 0; j < tmpFaces.size(); j++) {
+					
+					btFace& face = tmpFaces[j];
+					// is this a face of the current coplanar group?
+					bool is_in_current_group = false;
+					for(int k = 0; k < coplanarFaceGroup.size(); k++) {
+						if(coplanarFaceGroup[k] == j) {
+							is_in_current_group = true;
+							break;
+						}
+					}
+					if(is_in_current_group) // ignore this face...
+						continue;
+					// does this face use this rejected vertex?
+					for(int v = 0; v < face.m_indices.size(); v++) {
+						if(face.m_indices[v] == orgpoints[i].m_orgIndex) {
+							// this rejected vertex is used in another face -- reject merge
+							reject_merge = true;
+							break;
+						}
+					}
+					if(reject_merge)
+						break;
+				}
+				if(reject_merge)
+					break;
+			}
+
+			if (!reject_merge)
+			{
+				// do this merge!
+				did_merge = true;
+				m_polyhedron->m_faces.push_back(combinedFace);
+			}
+		}
+		if(!did_merge)
+		{
+			for (int i=0;i<coplanarFaceGroup.size();i++)
+			{
+				btFace face = tmpFaces[coplanarFaceGroup[i]];
+				m_polyhedron->m_faces.push_back(face);
+			}
+
+		} 
+
+
+
+	}
+	
+	m_polyhedron->initialize();
+
+	return true;
+}
+
+#ifndef MIN
+    #define MIN(_a, _b)     ((_a) < (_b) ? (_a) : (_b))
+#endif
 
 btVector3	btPolyhedralConvexShape::localGetSupportingVertexWithoutMargin(const btVector3& vec0)const
 {
@@ -44,17 +341,19 @@ btVector3	btPolyhedralConvexShape::localGetSupportingVertexWithoutMargin(const b
 	btVector3 vtx;
 	btScalar newDot;
 
-	for (i=0;i<getNumVertices();i++)
-	{
-		getVertex(i,vtx);
-		newDot = vec.dot(vtx);
+    for( int k = 0; k < getNumVertices(); k += 128 )
+    {
+        btVector3 temp[128];
+        int inner_count = MIN(getNumVertices() - k, 128);
+        for( i = 0; i < inner_count; i++ )
+            getVertex(i,temp[i]); 
+        i = (int) vec.maxDot( temp, inner_count, newDot);
 		if (newDot > maxDot)
 		{
 			maxDot = newDot;
-			supVec = vtx;
-		}
-	}
-
+			supVec = temp[i];
+		}        
+    }
 	
 #endif //__SPU__
 	return supVec;
@@ -77,21 +376,23 @@ void	btPolyhedralConvexShape::batchedUnitVectorGetSupportingVertexWithoutMargin(
 
 	for (int j=0;j<numVectors;j++)
 	{
-	
-		const btVector3& vec = vectors[j];
-
-		for (i=0;i<getNumVertices();i++)
-		{
-			getVertex(i,vtx);
-			newDot = vec.dot(vtx);
-			if (newDot > supportVerticesOut[j][3])
-			{
-				//WARNING: don't swap next lines, the w component would get overwritten!
-				supportVerticesOut[j] = vtx;
+        const btVector3& vec = vectors[j];
+        
+        for( int k = 0; k < getNumVertices(); k += 128 )
+        {
+            btVector3 temp[128];
+            int inner_count = MIN(getNumVertices() - k, 128);
+            for( i = 0; i < inner_count; i++ )
+                getVertex(i,temp[i]); 
+            i = (int) vec.maxDot( temp, inner_count, newDot);
+            if (newDot > supportVerticesOut[j][3])
+            {
+				supportVerticesOut[j] = temp[i];
 				supportVerticesOut[j][3] = newDot;
-			}
-		}
-	}
+            }        
+        }
+    }
+
 #endif //__SPU__
 }
 
@@ -183,11 +484,14 @@ void	btPolyhedralConvexAabbCachingShape::recalcLocalAabb()
 		btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
 		vec[i] = btScalar(1.);
 		btVector3 tmp = localGetSupportingVertex(vec);
-		m_localAabbMax[i] = tmp[i]+m_collisionMargin;
+		m_localAabbMax[i] = tmp[i];
 		vec[i] = btScalar(-1.);
 		tmp = localGetSupportingVertex(vec);
-		m_localAabbMin[i] = tmp[i]-m_collisionMargin;
+		m_localAabbMin[i] = tmp[i];
 	}
 	#endif
 }
 
+
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h
index 2c691b956..961d001a9 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btPolyhedralConvexShape.h
@@ -13,23 +13,41 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef BU_SHAPE
-#define BU_SHAPE
+#ifndef BT_POLYHEDRAL_CONVEX_SHAPE_H
+#define BT_POLYHEDRAL_CONVEX_SHAPE_H
 
 #include "LinearMath/btMatrix3x3.h"
 #include "btConvexInternalShape.h"
+class btConvexPolyhedron;
 
 
 ///The btPolyhedralConvexShape is an internal interface class for polyhedral convex shapes.
-class btPolyhedralConvexShape : public btConvexInternalShape
+ATTRIBUTE_ALIGNED16(class) btPolyhedralConvexShape : public btConvexInternalShape
 {
+	
 
 protected:
 	
+	btConvexPolyhedron* m_polyhedron;
+
 public:
+	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 
 	btPolyhedralConvexShape();
 
+	virtual ~btPolyhedralConvexShape();
+
+	///optional method mainly used to generate multiple contact points by clipping polyhedral features (faces/edges)
+	///experimental/work-in-progress
+	virtual bool	initializePolyhedralFeatures(int shiftVerticesByMargin=0);
+
+	const btConvexPolyhedron*	getConvexPolyhedron() const
+	{
+		return m_polyhedron;
+	}
+
 	//brute force implementations
 
 	virtual btVector3	localGetSupportingVertexWithoutMargin(const btVector3& vec)const;
@@ -95,4 +113,4 @@ public:
 
 };
 
-#endif //BU_SHAPE
+#endif //BT_POLYHEDRAL_CONVEX_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp
index d964e1e48..6a337c786 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.cpp
@@ -62,10 +62,12 @@ void	btScaledBvhTriangleMeshShape::processAllTriangles(btTriangleCallback* callb
 	scaledAabbMin[0] = m_localScaling.getX() >= 0. ? aabbMin[0] * invLocalScaling[0] : aabbMax[0] * invLocalScaling[0];
 	scaledAabbMin[1] = m_localScaling.getY() >= 0. ? aabbMin[1] * invLocalScaling[1] : aabbMax[1] * invLocalScaling[1];
 	scaledAabbMin[2] = m_localScaling.getZ() >= 0. ? aabbMin[2] * invLocalScaling[2] : aabbMax[2] * invLocalScaling[2];
+	scaledAabbMin[3] = 0.f;
 	
 	scaledAabbMax[0] = m_localScaling.getX() <= 0. ? aabbMin[0] * invLocalScaling[0] : aabbMax[0] * invLocalScaling[0];
 	scaledAabbMax[1] = m_localScaling.getY() <= 0. ? aabbMin[1] * invLocalScaling[1] : aabbMax[1] * invLocalScaling[1];
 	scaledAabbMax[2] = m_localScaling.getZ() <= 0. ? aabbMin[2] * invLocalScaling[2] : aabbMax[2] * invLocalScaling[2];
+	scaledAabbMax[3] = 0.f;
 	
 	
 	m_bvhTriMeshShape->processAllTriangles(&scaledCallback,scaledAabbMin,scaledAabbMax);
@@ -96,9 +98,7 @@ void	btScaledBvhTriangleMeshShape::getAabb(const btTransform& trans,btVector3& a
 
 	btVector3 center = trans(localCenter);
 
-	btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
-		   abs_b[1].dot(localHalfExtents),
-		  abs_b[2].dot(localHalfExtents));
+    btVector3 extent = localHalfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);
 	aabbMin = center - extent;
 	aabbMax = center + extent;
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h
index d720b1b4f..39049eaf0 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SCALED_BVH_TRIANGLE_MESH_SHAPE_H
-#define SCALED_BVH_TRIANGLE_MESH_SHAPE_H
+#ifndef BT_SCALED_BVH_TRIANGLE_MESH_SHAPE_H
+#define BT_SCALED_BVH_TRIANGLE_MESH_SHAPE_H
 
 #include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
 
@@ -31,6 +31,8 @@ ATTRIBUTE_ALIGNED16(class) btScaledBvhTriangleMeshShape : public btConcaveShape
 
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 
 	btScaledBvhTriangleMeshShape(btBvhTriangleMeshShape* childShape,const btVector3& localScaling);
 
@@ -57,6 +59,37 @@ public:
 	//debugging
 	virtual const char*	getName()const {return "SCALEDBVHTRIANGLEMESH";}
 
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 };
 
-#endif //BVH_TRIANGLE_MESH_SHAPE_H
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btScaledTriangleMeshShapeData
+{
+	btTriangleMeshShapeData	m_trimeshShapeData;
+
+	btVector3FloatData	m_localScaling;
+};
+
+
+SIMD_FORCE_INLINE	int	btScaledBvhTriangleMeshShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btScaledTriangleMeshShapeData);
+}
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btScaledBvhTriangleMeshShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btScaledTriangleMeshShapeData* scaledMeshData = (btScaledTriangleMeshShapeData*) dataBuffer;
+	m_bvhTriMeshShape->serialize(&scaledMeshData->m_trimeshShapeData,serializer);
+	scaledMeshData->m_trimeshShapeData.m_collisionShapeData.m_shapeType = SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE;
+	m_localScaling.serializeFloat(scaledMeshData->m_localScaling);
+	return "btScaledTriangleMeshShapeData";
+}
+
+
+#endif //BT_SCALED_BVH_TRIANGLE_MESH_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.cpp
index 061054444..3beaf8658 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.cpp
@@ -21,52 +21,6 @@ subject to the following restrictions:
 
 #define NUM_UNITSPHERE_POINTS 42
 
-static btVector3 btUnitSpherePoints[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
-{
-	btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
-	btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
-	btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
-	btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
-	btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
-	btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
-	btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
-	btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
-	btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
-	btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
-	btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
-	btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
-	btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
-	btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
-	btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
-	btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
-	btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
-	btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
-	btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
-	btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
-	btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
-	btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
-	btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
-	btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
-	btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
-	btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
-	btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
-	btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
-	btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
-	btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
-	btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
-	btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
-	btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
-	btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
-	btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
-	btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
-	btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
-	btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
-	btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
-	btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
-	btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
-	btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
-};
-
 btShapeHull::btShapeHull (const btConvexShape* shape)
 {
 	m_shape = shape;
@@ -93,7 +47,7 @@ btShapeHull::buildHull (btScalar /*margin*/)
 			{
 				btVector3 norm;
 				m_shape->getPreferredPenetrationDirection(i,norm);
-				btUnitSpherePoints[numSampleDirections] = norm;
+				getUnitSpherePoints()[numSampleDirections] = norm;
 				numSampleDirections++;
 			}
 		}
@@ -103,7 +57,7 @@ btShapeHull::buildHull (btScalar /*margin*/)
 	int i;
 	for (i = 0; i < numSampleDirections; i++)
 	{
-		supportPoints[i] = m_shape->localGetSupportingVertex(btUnitSpherePoints[i]);
+		supportPoints[i] = m_shape->localGetSupportingVertex(getUnitSpherePoints()[i]);
 	}
 
 	HullDesc hd;
@@ -163,3 +117,54 @@ btShapeHull::numIndices () const
 	return static_cast<int>(m_numIndices);
 }
 
+
+btVector3* btShapeHull::getUnitSpherePoints()
+{
+	static btVector3 sUnitSpherePoints[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
+	{
+		btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
+		btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
+		btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
+		btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
+		btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
+		btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
+		btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
+		btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
+		btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
+		btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
+		btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
+		btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
+		btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
+		btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
+		btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
+		btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
+		btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
+		btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
+		btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
+		btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
+		btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
+		btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
+		btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
+		btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
+		btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+		btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
+		btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+		btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
+		btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
+		btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+		btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
+		btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+		btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
+		btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
+		btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
+		btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
+		btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
+		btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
+		btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
+		btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
+		btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
+		btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
+	};
+	return sUnitSpherePoints;
+}
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.h
index 708655d21..e959f198b 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btShapeHull.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 
 ///btShapeHull implemented by John McCutchan.
 
-#ifndef _SHAPE_HULL_H
-#define _SHAPE_HULL_H
+#ifndef BT_SHAPE_HULL_H
+#define BT_SHAPE_HULL_H
 
 #include "LinearMath/btAlignedObjectArray.h"
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
@@ -25,9 +25,20 @@ subject to the following restrictions:
 ///The btShapeHull class takes a btConvexShape, builds a simplified convex hull using btConvexHull and provides triangle indices and vertices.
 ///It can be useful for to simplify a complex convex object and for visualization of a non-polyhedral convex object.
 ///It approximates the convex hull using the supporting vertex of 42 directions.
-class btShapeHull
+ATTRIBUTE_ALIGNED16(class) btShapeHull
 {
+protected:
+
+	btAlignedObjectArray<btVector3> m_vertices;
+	btAlignedObjectArray<unsigned int> m_indices;
+	unsigned int m_numIndices;
+	const btConvexShape* m_shape;
+
+	static btVector3* getUnitSpherePoints();
+
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btShapeHull (const btConvexShape* shape);
 	~btShapeHull ();
 
@@ -45,12 +56,6 @@ public:
 	{
 		return &m_indices[0];
 	}
-
-protected:
-	btAlignedObjectArray<btVector3> m_vertices;
-	btAlignedObjectArray<unsigned int> m_indices;
-	unsigned int m_numIndices;
-	const btConvexShape* m_shape;
 };
 
-#endif //_SHAPE_HULL_H
+#endif //BT_SHAPE_HULL_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btSphereShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btSphereShape.h
index f98372442..b192efeeb 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btSphereShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btSphereShape.h
@@ -12,8 +12,8 @@ subject to the following restrictions:
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
-#ifndef SPHERE_MINKOWSKI_H
-#define SPHERE_MINKOWSKI_H
+#ifndef BT_SPHERE_MINKOWSKI_H
+#define BT_SPHERE_MINKOWSKI_H
 
 #include "btConvexInternalShape.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h" // for the types
@@ -70,4 +70,4 @@ public:
 };
 
 
-#endif //SPHERE_MINKOWSKI_H
+#endif //BT_SPHERE_MINKOWSKI_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStaticPlaneShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStaticPlaneShape.h
index 2b5d4b338..e6e328839 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStaticPlaneShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStaticPlaneShape.h
@@ -13,14 +13,14 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef STATIC_PLANE_SHAPE_H
-#define STATIC_PLANE_SHAPE_H
+#ifndef BT_STATIC_PLANE_SHAPE_H
+#define BT_STATIC_PLANE_SHAPE_H
 
 #include "btConcaveShape.h"
 
 
 ///The btStaticPlaneShape simulates an infinite non-moving (static) collision plane.
-class btStaticPlaneShape : public btConcaveShape
+ATTRIBUTE_ALIGNED16(class) btStaticPlaneShape : public btConcaveShape
 {
 protected:
 	btVector3	m_localAabbMin;
@@ -31,6 +31,8 @@ protected:
 	btVector3	m_localScaling;
 
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btStaticPlaneShape(const btVector3& planeNormal,btScalar planeConstant);
 
 	virtual ~btStaticPlaneShape();
@@ -58,7 +60,46 @@ public:
 	//debugging
 	virtual const char*	getName()const {return "STATICPLANE";}
 
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 
 };
 
-#endif //STATIC_PLANE_SHAPE_H
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btStaticPlaneShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btVector3FloatData	m_localScaling;
+	btVector3FloatData	m_planeNormal;
+	float			m_planeConstant;
+	char	m_pad[4];
+};
+
+
+SIMD_FORCE_INLINE	int	btStaticPlaneShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btStaticPlaneShapeData);
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btStaticPlaneShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btStaticPlaneShapeData* planeData = (btStaticPlaneShapeData*) dataBuffer;
+	btCollisionShape::serialize(&planeData->m_collisionShapeData,serializer);
+
+	m_localScaling.serializeFloat(planeData->m_localScaling);
+	m_planeNormal.serializeFloat(planeData->m_planeNormal);
+	planeData->m_planeConstant = float(m_planeConstant);
+		
+	return "btStaticPlaneShapeData";
+}
+
+
+#endif //BT_STATIC_PLANE_SHAPE_H
+
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp
index 34389f7c5..b3d449676 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.cpp
@@ -14,6 +14,7 @@ subject to the following restrictions:
 */
 
 #include "btStridingMeshInterface.h"
+#include "LinearMath/btSerializer.h"
 
 btStridingMeshInterface::~btStridingMeshInterface()
 {
@@ -87,6 +88,21 @@ void	btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleInde
 					 }
 					 break;
 				 }
+			case PHY_UCHAR:
+				 {
+					 for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+					 {
+						 unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride);
+						 graphicsbase = (float*)(vertexbase+tri_indices[0]*stride);
+						 triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[1]*stride);
+						 triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 graphicsbase = (float*)(vertexbase+tri_indices[2]*stride);
+						 triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ());
+						 callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+					 }
+					 break;
+				 }
 			 default:
 				 btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
 			 }
@@ -129,6 +145,21 @@ void	btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleInde
 						}
 						break;
 					}
+				case PHY_UCHAR:
+					{
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride);
+							graphicsbase = (double*)(vertexbase+tri_indices[0]*stride);
+							triangle[0].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),(btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[1]*stride);
+							triangle[1].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							graphicsbase = (double*)(vertexbase+tri_indices[2]*stride);
+							triangle[2].setValue((btScalar)graphicsbase[0]*meshScaling.getX(),(btScalar)graphicsbase[1]*meshScaling.getY(),  (btScalar)graphicsbase[2]*meshScaling.getZ());
+							callback->internalProcessTriangleIndex(triangle,part,gfxindex);
+						}
+						break;
+					}
 				default:
 					btAssert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));
 				}
@@ -179,3 +210,172 @@ void	btStridingMeshInterface::calculateAabbBruteForce(btVector3& aabbMin,btVecto
 	aabbMin = aabbCallback.m_aabbMin;
 	aabbMax = aabbCallback.m_aabbMax;
 }
+
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btStridingMeshInterface::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btStridingMeshInterfaceData* trimeshData = (btStridingMeshInterfaceData*) dataBuffer;
+
+	trimeshData->m_numMeshParts = getNumSubParts();
+
+	//void* uniquePtr = 0;
+
+	trimeshData->m_meshPartsPtr = 0;
+
+	if (trimeshData->m_numMeshParts)
+	{
+		btChunk* chunk = serializer->allocate(sizeof(btMeshPartData),trimeshData->m_numMeshParts);
+		btMeshPartData* memPtr = (btMeshPartData*)chunk->m_oldPtr;
+		trimeshData->m_meshPartsPtr = (btMeshPartData *)serializer->getUniquePointer(memPtr);
+
+
+	//	int numtotalphysicsverts = 0;
+		int part,graphicssubparts = getNumSubParts();
+		const unsigned char * vertexbase;
+		const unsigned char * indexbase;
+		int indexstride;
+		PHY_ScalarType type;
+		PHY_ScalarType gfxindextype;
+		int stride,numverts,numtriangles;
+		int gfxindex;
+	//	btVector3 triangle[3];
+
+	//	btVector3 meshScaling = getScaling();
+
+		///if the number of parts is big, the performance might drop due to the innerloop switch on indextype
+		for (part=0;part<graphicssubparts ;part++,memPtr++)
+		{
+			getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,type,stride,&indexbase,indexstride,numtriangles,gfxindextype,part);
+			memPtr->m_numTriangles = numtriangles;//indices = 3*numtriangles
+			memPtr->m_numVertices = numverts;
+			memPtr->m_indices16 = 0;
+			memPtr->m_indices32 = 0;
+			memPtr->m_3indices16 = 0;
+			memPtr->m_3indices8 = 0;
+			memPtr->m_vertices3f = 0;
+			memPtr->m_vertices3d = 0;
+
+
+			switch (gfxindextype)
+			{
+			case PHY_INTEGER:
+				{
+					int numindices = numtriangles*3;
+				
+					if (numindices)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btIntIndexData),numindices);
+						btIntIndexData* tmpIndices = (btIntIndexData*)chunk->m_oldPtr;
+						memPtr->m_indices32 = (btIntIndexData*)serializer->getUniquePointer(tmpIndices);
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride);
+							tmpIndices[gfxindex*3].m_value = tri_indices[0];
+							tmpIndices[gfxindex*3+1].m_value = tri_indices[1];
+							tmpIndices[gfxindex*3+2].m_value = tri_indices[2];
+						}
+						serializer->finalizeChunk(chunk,"btIntIndexData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+			case PHY_SHORT:
+				{
+					if (numtriangles)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btShortIntIndexTripletData),numtriangles);
+						btShortIntIndexTripletData* tmpIndices = (btShortIntIndexTripletData*)chunk->m_oldPtr;
+						memPtr->m_3indices16 = (btShortIntIndexTripletData*) serializer->getUniquePointer(tmpIndices);
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride);
+							tmpIndices[gfxindex].m_values[0] = tri_indices[0];
+							tmpIndices[gfxindex].m_values[1] = tri_indices[1];
+							tmpIndices[gfxindex].m_values[2] = tri_indices[2];
+						}
+						serializer->finalizeChunk(chunk,"btShortIntIndexTripletData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+				case PHY_UCHAR:
+				{
+					if (numtriangles)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btCharIndexTripletData),numtriangles);
+						btCharIndexTripletData* tmpIndices = (btCharIndexTripletData*)chunk->m_oldPtr;
+						memPtr->m_3indices8 = (btCharIndexTripletData*) serializer->getUniquePointer(tmpIndices);
+						for (gfxindex=0;gfxindex<numtriangles;gfxindex++)
+						{
+							unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride);
+							tmpIndices[gfxindex].m_values[0] = tri_indices[0];
+							tmpIndices[gfxindex].m_values[1] = tri_indices[1];
+							tmpIndices[gfxindex].m_values[2] = tri_indices[2];
+						}
+						serializer->finalizeChunk(chunk,"btCharIndexTripletData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+			default:
+				{
+					btAssert(0);
+					//unknown index type
+				}
+			}
+
+			switch (type)
+			{
+			case PHY_FLOAT:
+			 {
+				 float* graphicsbase;
+
+				 if (numverts)
+				 {
+					 btChunk* chunk = serializer->allocate(sizeof(btVector3FloatData),numverts);
+					 btVector3FloatData* tmpVertices = (btVector3FloatData*) chunk->m_oldPtr;
+					 memPtr->m_vertices3f = (btVector3FloatData *)serializer->getUniquePointer(tmpVertices);
+					 for (int i=0;i<numverts;i++)
+					 {
+						 graphicsbase = (float*)(vertexbase+i*stride);
+						 tmpVertices[i].m_floats[0] = graphicsbase[0];
+						 tmpVertices[i].m_floats[1] = graphicsbase[1];
+						 tmpVertices[i].m_floats[2] = graphicsbase[2];
+					 }
+					 serializer->finalizeChunk(chunk,"btVector3FloatData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+				 }
+				 break;
+				}
+
+			case PHY_DOUBLE:
+				{
+					if (numverts)
+					{
+						btChunk* chunk = serializer->allocate(sizeof(btVector3DoubleData),numverts);
+						btVector3DoubleData* tmpVertices = (btVector3DoubleData*) chunk->m_oldPtr;
+						memPtr->m_vertices3d = (btVector3DoubleData *) serializer->getUniquePointer(tmpVertices);
+						for (int i=0;i<numverts;i++)
+					 {
+						 double* graphicsbase = (double*)(vertexbase+i*stride);//for now convert to float, might leave it at double
+						 tmpVertices[i].m_floats[0] = graphicsbase[0];
+						 tmpVertices[i].m_floats[1] = graphicsbase[1];
+						 tmpVertices[i].m_floats[2] = graphicsbase[2];
+					 }
+						serializer->finalizeChunk(chunk,"btVector3DoubleData",BT_ARRAY_CODE,(void*)chunk->m_oldPtr);
+					}
+					break;
+				}
+
+			default:
+				btAssert((type == PHY_FLOAT) || (type == PHY_DOUBLE));
+			}
+
+			unLockReadOnlyVertexBase(part);
+		}
+
+		serializer->finalizeChunk(chunk,"btMeshPartData",BT_ARRAY_CODE,chunk->m_oldPtr);
+	}
+
+
+	m_scaling.serializeFloat(trimeshData->m_scaling);
+	return "btStridingMeshInterfaceData";
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.h
index 09641cf53..9fbe13976 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btStridingMeshInterface.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef STRIDING_MESHINTERFACE_H
-#define STRIDING_MESHINTERFACE_H
+#ifndef BT_STRIDING_MESHINTERFACE_H
+#define BT_STRIDING_MESHINTERFACE_H
 
 #include "LinearMath/btVector3.h"
 #include "btTriangleCallback.h"
@@ -22,16 +22,20 @@ subject to the following restrictions:
 
 
 
+
+
 ///	The btStridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes.
 /// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips.
 /// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory.
-class  btStridingMeshInterface
+ATTRIBUTE_ALIGNED16(class ) btStridingMeshInterface
 {
 	protected:
 	
 		btVector3 m_scaling;
 
 	public:
+		BT_DECLARE_ALIGNED_ALLOCATOR();
+		
 		btStridingMeshInterface() :m_scaling(btScalar(1.),btScalar(1.),btScalar(1.))
 		{
 
@@ -89,8 +93,72 @@ class  btStridingMeshInterface
 			m_scaling = scaling;
 		}
 
-	
+		virtual	int	calculateSerializeBufferSize() const;
+
+		///fills the dataBuffer and returns the struct name (and 0 on failure)
+		virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 
 };
 
-#endif //STRIDING_MESHINTERFACE_H
+struct	btIntIndexData
+{
+	int	m_value;
+};
+
+struct	btShortIntIndexData
+{
+	short m_value;
+	char m_pad[2];
+};
+
+struct	btShortIntIndexTripletData
+{
+	short	m_values[3];
+	char	m_pad[2];
+};
+
+struct	btCharIndexTripletData
+{
+	unsigned char m_values[3];
+	char	m_pad;
+};
+
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btMeshPartData
+{
+	btVector3FloatData			*m_vertices3f;
+	btVector3DoubleData			*m_vertices3d;
+
+	btIntIndexData				*m_indices32;
+	btShortIntIndexTripletData	*m_3indices16;
+	btCharIndexTripletData		*m_3indices8;
+
+	btShortIntIndexData			*m_indices16;//backwards compatibility
+
+	int                     m_numTriangles;//length of m_indices = m_numTriangles
+	int                     m_numVertices;
+};
+
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btStridingMeshInterfaceData
+{
+	btMeshPartData	*m_meshPartsPtr;
+	btVector3FloatData	m_scaling;
+	int	m_numMeshParts;
+	char m_padding[4];
+};
+
+
+
+
+SIMD_FORCE_INLINE	int	btStridingMeshInterface::calculateSerializeBufferSize() const
+{
+	return sizeof(btStridingMeshInterfaceData);
+}
+
+
+
+#endif //BT_STRIDING_MESHINTERFACE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTetrahedronShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTetrahedronShape.h
index 72e9f2328..b69209835 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTetrahedronShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTetrahedronShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef BU_SIMPLEX_1TO4_SHAPE
-#define BU_SIMPLEX_1TO4_SHAPE
+#ifndef BT_SIMPLEX_1TO4_SHAPE
+#define BT_SIMPLEX_1TO4_SHAPE
 
 
 #include "btPolyhedralConvexShape.h"
@@ -22,7 +22,7 @@ subject to the following restrictions:
 
 
 ///The btBU_Simplex1to4 implements tetrahedron, triangle, line, vertex collision shapes. In most cases it is better to use btConvexHullShape instead.
-class btBU_Simplex1to4 : public btPolyhedralConvexAabbCachingShape
+ATTRIBUTE_ALIGNED16(class) btBU_Simplex1to4 : public btPolyhedralConvexAabbCachingShape
 {
 protected:
 
@@ -30,6 +30,8 @@ protected:
 	btVector3	m_vertices[4];
 
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btBU_Simplex1to4();
 
 	btBU_Simplex1to4(const btVector3& pt0);
@@ -71,4 +73,4 @@ public:
 
 };
 
-#endif //BU_SIMPLEX_1TO4_SHAPE
+#endif //BT_SIMPLEX_1TO4_SHAPE
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleCallback.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleCallback.h
index 0499702b0..461c57f87 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleCallback.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleCallback.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef TRIANGLE_CALLBACK_H
-#define TRIANGLE_CALLBACK_H
+#ifndef BT_TRIANGLE_CALLBACK_H
+#define BT_TRIANGLE_CALLBACK_H
 
 #include "LinearMath/btVector3.h"
 
@@ -39,4 +39,4 @@ public:
 
 
 
-#endif //TRIANGLE_CALLBACK_H
+#endif //BT_TRIANGLE_CALLBACK_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h
index c64ea6e70..9e1544e87 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h
@@ -29,9 +29,11 @@ ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
 
    int                     m_numTriangles;
    const unsigned char *   m_triangleIndexBase;
+   // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed)
    int                     m_triangleIndexStride;
    int                     m_numVertices;
    const unsigned char *   m_vertexBase;
+   // Size of a vertex, in bytes
    int                     m_vertexStride;
 
    // The index type is set when adding an indexed mesh to the
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleInfoMap.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleInfoMap.h
new file mode 100644
index 000000000..17deef89d
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleInfoMap.h
@@ -0,0 +1,241 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2010 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef _BT_TRIANGLE_INFO_MAP_H
+#define _BT_TRIANGLE_INFO_MAP_H
+
+
+#include "LinearMath/btHashMap.h"
+#include "LinearMath/btSerializer.h"
+
+
+///for btTriangleInfo m_flags
+#define TRI_INFO_V0V1_CONVEX 1
+#define TRI_INFO_V1V2_CONVEX 2
+#define TRI_INFO_V2V0_CONVEX 4
+
+#define TRI_INFO_V0V1_SWAP_NORMALB 8
+#define TRI_INFO_V1V2_SWAP_NORMALB 16
+#define TRI_INFO_V2V0_SWAP_NORMALB 32
+
+
+///The btTriangleInfo structure stores information to adjust collision normals to avoid collisions against internal edges
+///it can be generated using 
+struct	btTriangleInfo
+{
+	btTriangleInfo()
+	{
+		m_edgeV0V1Angle = SIMD_2_PI;
+		m_edgeV1V2Angle = SIMD_2_PI;
+		m_edgeV2V0Angle = SIMD_2_PI;
+		m_flags=0;
+	}
+
+	int			m_flags;
+
+	btScalar	m_edgeV0V1Angle;
+	btScalar	m_edgeV1V2Angle;
+	btScalar	m_edgeV2V0Angle;
+
+};
+
+typedef btHashMap<btHashInt,btTriangleInfo> btInternalTriangleInfoMap;
+
+
+///The btTriangleInfoMap stores edge angle information for some triangles. You can compute this information yourself or using btGenerateInternalEdgeInfo.
+struct	btTriangleInfoMap : public btInternalTriangleInfoMap
+{
+	btScalar	m_convexEpsilon;///used to determine if an edge or contact normal is convex, using the dot product
+	btScalar	m_planarEpsilon; ///used to determine if a triangle edge is planar with zero angle
+	btScalar	m_equalVertexThreshold; ///used to compute connectivity: if the distance between two vertices is smaller than m_equalVertexThreshold, they are considered to be 'shared'
+	btScalar	m_edgeDistanceThreshold; ///used to determine edge contacts: if the closest distance between a contact point and an edge is smaller than this distance threshold it is considered to "hit the edge"
+	btScalar	m_maxEdgeAngleThreshold; //ignore edges that connect triangles at an angle larger than this m_maxEdgeAngleThreshold
+	btScalar	m_zeroAreaThreshold; ///used to determine if a triangle is degenerate (length squared of cross product of 2 triangle edges < threshold)
+	
+	
+	btTriangleInfoMap()
+	{
+		m_convexEpsilon = 0.00f;
+		m_planarEpsilon = 0.0001f;
+		m_equalVertexThreshold = btScalar(0.0001)*btScalar(0.0001);
+		m_edgeDistanceThreshold = btScalar(0.1);
+		m_zeroAreaThreshold = btScalar(0.0001)*btScalar(0.0001);
+		m_maxEdgeAngleThreshold = SIMD_2_PI;
+	}
+	virtual ~btTriangleInfoMap() {}
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	void	deSerialize(struct btTriangleInfoMapData& data);
+
+};
+
+///those fields have to be float and not btScalar for the serialization to work properly
+struct	btTriangleInfoData
+{
+	int			m_flags;
+	float	m_edgeV0V1Angle;
+	float	m_edgeV1V2Angle;
+	float	m_edgeV2V0Angle;
+};
+
+struct	btTriangleInfoMapData
+{
+	int					*m_hashTablePtr;
+	int					*m_nextPtr;
+	btTriangleInfoData	*m_valueArrayPtr;
+	int					*m_keyArrayPtr;
+
+	float	m_convexEpsilon;
+	float	m_planarEpsilon;
+	float	m_equalVertexThreshold; 
+	float	m_edgeDistanceThreshold;
+	float	m_zeroAreaThreshold;
+
+	int		m_nextSize;
+	int		m_hashTableSize;
+	int		m_numValues;
+	int		m_numKeys;
+	char	m_padding[4];
+};
+
+SIMD_FORCE_INLINE	int	btTriangleInfoMap::calculateSerializeBufferSize() const
+{
+	return sizeof(btTriangleInfoMapData);
+}
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btTriangleInfoMap::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btTriangleInfoMapData* tmapData = (btTriangleInfoMapData*) dataBuffer;
+	tmapData->m_convexEpsilon = (float)m_convexEpsilon;
+	tmapData->m_planarEpsilon = (float)m_planarEpsilon;
+	tmapData->m_equalVertexThreshold =(float) m_equalVertexThreshold;
+	tmapData->m_edgeDistanceThreshold = (float)m_edgeDistanceThreshold;
+	tmapData->m_zeroAreaThreshold = (float)m_zeroAreaThreshold;
+	
+	tmapData->m_hashTableSize = m_hashTable.size();
+
+	tmapData->m_hashTablePtr = tmapData->m_hashTableSize ? (int*)serializer->getUniquePointer((void*)&m_hashTable[0]) : 0;
+	if (tmapData->m_hashTablePtr)
+	{ 
+		//serialize an int buffer
+		int sz = sizeof(int);
+		int numElem = tmapData->m_hashTableSize;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		int* memPtr = (int*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			*memPtr = m_hashTable[i];
+		}
+		serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*)&m_hashTable[0]);
+
+	}
+
+	tmapData->m_nextSize = m_next.size();
+	tmapData->m_nextPtr = tmapData->m_nextSize? (int*)serializer->getUniquePointer((void*)&m_next[0]): 0;
+	if (tmapData->m_nextPtr)
+	{
+		int sz = sizeof(int);
+		int numElem = tmapData->m_nextSize;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		int* memPtr = (int*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			*memPtr = m_next[i];
+		}
+		serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*)&m_next[0]);
+	}
+	
+	tmapData->m_numValues = m_valueArray.size();
+	tmapData->m_valueArrayPtr = tmapData->m_numValues ? (btTriangleInfoData*)serializer->getUniquePointer((void*)&m_valueArray[0]): 0;
+	if (tmapData->m_valueArrayPtr)
+	{
+		int sz = sizeof(btTriangleInfoData);
+		int numElem = tmapData->m_numValues;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btTriangleInfoData* memPtr = (btTriangleInfoData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_edgeV0V1Angle = (float)m_valueArray[i].m_edgeV0V1Angle;
+			memPtr->m_edgeV1V2Angle = (float)m_valueArray[i].m_edgeV1V2Angle;
+			memPtr->m_edgeV2V0Angle = (float)m_valueArray[i].m_edgeV2V0Angle;
+			memPtr->m_flags = m_valueArray[i].m_flags;
+		}
+		serializer->finalizeChunk(chunk,"btTriangleInfoData",BT_ARRAY_CODE,(void*) &m_valueArray[0]);
+	}
+	
+	tmapData->m_numKeys = m_keyArray.size();
+	tmapData->m_keyArrayPtr = tmapData->m_numKeys ? (int*)serializer->getUniquePointer((void*)&m_keyArray[0]) : 0;
+	if (tmapData->m_keyArrayPtr)
+	{
+		int sz = sizeof(int);
+		int numElem = tmapData->m_numValues;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		int* memPtr = (int*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			*memPtr = m_keyArray[i].getUid1();
+		}
+		serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*) &m_keyArray[0]);
+
+	}
+	return "btTriangleInfoMapData";
+}
+
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	void	btTriangleInfoMap::deSerialize(btTriangleInfoMapData& tmapData )
+{
+
+
+	m_convexEpsilon = tmapData.m_convexEpsilon;
+	m_planarEpsilon = tmapData.m_planarEpsilon;
+	m_equalVertexThreshold = tmapData.m_equalVertexThreshold;
+	m_edgeDistanceThreshold = tmapData.m_edgeDistanceThreshold;
+	m_zeroAreaThreshold = tmapData.m_zeroAreaThreshold;
+	m_hashTable.resize(tmapData.m_hashTableSize);
+	int i =0;
+	for (i=0;i<tmapData.m_hashTableSize;i++)
+	{
+		m_hashTable[i] = tmapData.m_hashTablePtr[i];
+	}
+	m_next.resize(tmapData.m_nextSize);
+	for (i=0;i<tmapData.m_nextSize;i++)
+	{
+		m_next[i] = tmapData.m_nextPtr[i];
+	}
+	m_valueArray.resize(tmapData.m_numValues);
+	for (i=0;i<tmapData.m_numValues;i++)
+	{
+		m_valueArray[i].m_edgeV0V1Angle = tmapData.m_valueArrayPtr[i].m_edgeV0V1Angle;
+		m_valueArray[i].m_edgeV1V2Angle = tmapData.m_valueArrayPtr[i].m_edgeV1V2Angle;
+		m_valueArray[i].m_edgeV2V0Angle = tmapData.m_valueArrayPtr[i].m_edgeV2V0Angle;
+		m_valueArray[i].m_flags = tmapData.m_valueArrayPtr[i].m_flags;
+	}
+	
+	m_keyArray.resize(tmapData.m_numKeys,btHashInt(0));
+	for (i=0;i<tmapData.m_numKeys;i++)
+	{
+		m_keyArray[i].setUid1(tmapData.m_keyArrayPtr[i]);
+	}
+}
+
+
+#endif //_BT_TRIANGLE_INFO_MAP_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.cpp
index b29e0f71e..51a2f8a07 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.cpp
@@ -138,3 +138,25 @@ int btTriangleMesh::getNumTriangles() const
 	}
 	return m_16bitIndices.size() / 3;
 }
+
+void btTriangleMesh::preallocateVertices(int numverts)
+{
+	if (m_use4componentVertices)
+	{
+		m_4componentVertices.reserve(numverts);
+	} else
+	{
+		m_3componentVertices.reserve(numverts);
+	}
+}
+
+void btTriangleMesh::preallocateIndices(int numindices)
+{
+	if (m_use32bitIndices)
+	{
+		m_32bitIndices.reserve(numindices);
+	} else
+	{
+		m_16bitIndices.reserve(numindices);
+	}
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.h
index d2624fe18..29d1b5cda 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMesh.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef TRIANGLE_MESH_H
-#define TRIANGLE_MESH_H
+#ifndef BT_TRIANGLE_MESH_H
+#define BT_TRIANGLE_MESH_H
 
 #include "btTriangleIndexVertexArray.h"
 #include "LinearMath/btVector3.h"
@@ -55,8 +55,8 @@ class btTriangleMesh : public btTriangleIndexVertexArray
 		
 		int getNumTriangles() const;
 
-		virtual void	preallocateVertices(int numverts){(void) numverts;}
-		virtual void	preallocateIndices(int numindices){(void) numindices;}
+		virtual void	preallocateVertices(int numverts);
+		virtual void	preallocateIndices(int numindices);
 
 		///findOrAddVertex is an internal method, use addTriangle instead
 		int		findOrAddVertex(const btVector3& vertex, bool removeDuplicateVertices);
@@ -65,5 +65,5 @@ class btTriangleMesh : public btTriangleIndexVertexArray
 		
 };
 
-#endif //TRIANGLE_MESH_H
+#endif //BT_TRIANGLE_MESH_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp
index 774fd4e76..0e1795140 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.cpp
@@ -55,13 +55,9 @@ void btTriangleMeshShape::getAabb(const btTransform& trans,btVector3& aabbMin,bt
 
 	btVector3 center = trans(localCenter);
 
-	btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
-		   abs_b[1].dot(localHalfExtents),
-		  abs_b[2].dot(localHalfExtents));
+    btVector3 extent = localHalfExtents.dot3(abs_b[0], abs_b[1], abs_b[2]);
 	aabbMin = center - extent;
 	aabbMax = center + extent;
-
-
 }
 
 void	btTriangleMeshShape::recalcLocalAabb()
@@ -207,3 +203,5 @@ btVector3 btTriangleMeshShape::localGetSupportingVertex(const btVector3& vec) co
 
 	return supportVertex;
 }
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.h
index 4bb14841f..453e58005 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleMeshShape.h
@@ -13,15 +13,15 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef TRIANGLE_MESH_SHAPE_H
-#define TRIANGLE_MESH_SHAPE_H
+#ifndef BT_TRIANGLE_MESH_SHAPE_H
+#define BT_TRIANGLE_MESH_SHAPE_H
 
 #include "btConcaveShape.h"
 #include "btStridingMeshInterface.h"
 
 
 ///The btTriangleMeshShape is an internal concave triangle mesh interface. Don't use this class directly, use btBvhTriangleMeshShape instead.
-class btTriangleMeshShape : public btConcaveShape
+ATTRIBUTE_ALIGNED16(class) btTriangleMeshShape : public btConcaveShape
 {
 protected:
 	btVector3	m_localAabbMin;
@@ -33,6 +33,7 @@ protected:
 	btTriangleMeshShape(btStridingMeshInterface* meshInterface);
 
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
 
 	virtual ~btTriangleMeshShape();
 
@@ -79,7 +80,11 @@ public:
 	//debugging
 	virtual const char*	getName()const {return "TRIANGLEMESH";}
 
+	
 
 };
 
-#endif //TRIANGLE_MESH_SHAPE_H
+
+
+
+#endif //BT_TRIANGLE_MESH_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleShape.h
index 847147cf6..a8a80f82f 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btTriangleShape.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef OBB_TRIANGLE_MINKOWSKI_H
-#define OBB_TRIANGLE_MINKOWSKI_H
+#ifndef BT_OBB_TRIANGLE_MINKOWSKI_H
+#define BT_OBB_TRIANGLE_MINKOWSKI_H
 
 #include "btConvexShape.h"
 #include "btBoxShape.h"
@@ -25,6 +25,8 @@ ATTRIBUTE_ALIGNED16(class) btTriangleShape : public btPolyhedralConvexShape
 
 public:
 
+BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btVector3	m_vertices1[3];
 
 	virtual int getNumVertices() const
@@ -66,7 +68,7 @@ public:
 
 	btVector3 localGetSupportingVertexWithoutMargin(const btVector3& dir)const 
 	{
-		btVector3 dots(dir.dot(m_vertices1[0]), dir.dot(m_vertices1[1]), dir.dot(m_vertices1[2]));
+        btVector3 dots = dir.dot3(m_vertices1[0], m_vertices1[1], m_vertices1[2]);
 	  	return m_vertices1[dots.maxAxis()];
 
 	}
@@ -76,7 +78,7 @@ public:
 		for (int i=0;i<numVectors;i++)
 		{
 			const btVector3& dir = vectors[i];
-			btVector3 dots(dir.dot(m_vertices1[0]), dir.dot(m_vertices1[1]), dir.dot(m_vertices1[2]));
+            btVector3 dots = dir.dot3(m_vertices1[0], m_vertices1[1], m_vertices1[2]);
   			supportVerticesOut[i] = m_vertices1[dots.maxAxis()];
 		}
 
@@ -178,5 +180,5 @@ public:
 
 };
 
-#endif //OBB_TRIANGLE_MINKOWSKI_H
+#endif //BT_OBB_TRIANGLE_MINKOWSKI_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.cpp b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.cpp
index 8e86f6bf2..b148bbd99 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.cpp
@@ -64,25 +64,70 @@ void	btUniformScalingShape::calculateLocalInertia(btScalar mass,btVector3& inert
 
 
 	///getAabb's default implementation is brute force, expected derived classes to implement a fast dedicated version
-void btUniformScalingShape::getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
+void btUniformScalingShape::getAabb(const btTransform& trans,btVector3& aabbMin,btVector3& aabbMax) const
 {
-	m_childConvexShape->getAabb(t,aabbMin,aabbMax);
-	btVector3 aabbCenter = (aabbMax+aabbMin)*btScalar(0.5);
-	btVector3 scaledAabbHalfExtends = (aabbMax-aabbMin)*btScalar(0.5)*m_uniformScalingFactor;
-
-	aabbMin = aabbCenter - scaledAabbHalfExtends;
-	aabbMax = aabbCenter + scaledAabbHalfExtends;
+	getAabbSlow(trans,aabbMin,aabbMax);
 
 }
 
 void btUniformScalingShape::getAabbSlow(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
 {
-	m_childConvexShape->getAabbSlow(t,aabbMin,aabbMax);
-	btVector3 aabbCenter = (aabbMax+aabbMin)*btScalar(0.5);
-	btVector3 scaledAabbHalfExtends = (aabbMax-aabbMin)*btScalar(0.5)*m_uniformScalingFactor;
+#if 1
+	btVector3 _directions[] =
+	{
+		btVector3( 1.,  0.,  0.),
+		btVector3( 0.,  1.,  0.),
+		btVector3( 0.,  0.,  1.),
+		btVector3( -1., 0.,  0.),
+		btVector3( 0., -1.,  0.),
+		btVector3( 0.,  0., -1.)
+	};
+	
+	btVector3 _supporting[] =
+	{
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.),
+		btVector3( 0., 0., 0.)
+	};
 
-	aabbMin = aabbCenter - scaledAabbHalfExtends;
-	aabbMax = aabbCenter + scaledAabbHalfExtends;
+	for (int i=0;i<6;i++)
+	{
+		_directions[i] = _directions[i]*t.getBasis();
+	}
+	
+	batchedUnitVectorGetSupportingVertexWithoutMargin(_directions, _supporting, 6);
+	
+	btVector3 aabbMin1(0,0,0),aabbMax1(0,0,0);
+
+	for ( int i = 0; i < 3; ++i )
+	{
+		aabbMax1[i] = t(_supporting[i])[i];
+		aabbMin1[i] = t(_supporting[i + 3])[i];
+	}
+	btVector3 marginVec(getMargin(),getMargin(),getMargin());
+	aabbMin = aabbMin1-marginVec;
+	aabbMax = aabbMax1+marginVec;
+	
+#else
+
+	btScalar margin = getMargin();
+	for (int i=0;i<3;i++)
+	{
+		btVector3 vec(btScalar(0.),btScalar(0.),btScalar(0.));
+		vec[i] = btScalar(1.);
+		btVector3 sv = localGetSupportingVertex(vec*t.getBasis());
+		btVector3 tmp = t(sv);
+		aabbMax[i] = tmp[i]+margin;
+		vec[i] = btScalar(-1.);
+		sv = localGetSupportingVertex(vec*t.getBasis());
+		tmp = t(sv);
+		aabbMin[i] = tmp[i]-margin;
+	}
+
+#endif
 }
 
 void	btUniformScalingShape::setLocalScaling(const btVector3& scaling) 
diff --git a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.h b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.h
index cbf7e6fd3..a10f58d24 100644
--- a/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/CollisionShapes/btUniformScalingShape.h
@@ -21,7 +21,7 @@ subject to the following restrictions:
 
 ///The btUniformScalingShape allows to re-use uniform scaled instances of btConvexShape in a memory efficient way.
 ///Istead of using btUniformScalingShape, it is better to use the non-uniform setLocalScaling method on convex shapes that implement it.
-class btUniformScalingShape : public btConvexShape
+ATTRIBUTE_ALIGNED16(class) btUniformScalingShape : public btConvexShape
 {
 	btConvexShape*	m_childConvexShape;
 
@@ -29,6 +29,8 @@ class btUniformScalingShape : public btConvexShape
 	
 	public:
 	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	btUniformScalingShape(	btConvexShape* convexChildShape, btScalar uniformScalingFactor);
 	
 	virtual ~btUniformScalingShape();
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btBoxCollision.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btBoxCollision.h
index 827a3c895..0a0357e5a 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btBoxCollision.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btBoxCollision.h
@@ -2,7 +2,7 @@
 #define BT_BOX_COLLISION_H_INCLUDED
 
 /*! \file gim_box_collision.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
@@ -218,9 +218,7 @@ public:
 
 	SIMD_FORCE_INLINE btVector3 transform(const btVector3 & point) const
 	{
-		return btVector3(m_R1to0[0].dot(point) + m_T1to0.x(),
-			m_R1to0[1].dot(point) + m_T1to0.y(),
-			m_R1to0[2].dot(point) + m_T1to0.z());
+        return point.dot3( m_R1to0[0], m_R1to0[1], m_R1to0[2] ) + m_T1to0;
 	}
 };
 
@@ -364,9 +362,9 @@ public:
 		// Compute new center
 		center = trans(center);
 
-		btVector3 textends(extends.dot(trans.getBasis().getRow(0).absolute()),
- 				 extends.dot(trans.getBasis().getRow(1).absolute()),
-				 extends.dot(trans.getBasis().getRow(2).absolute()));
+        btVector3 textends = extends.dot3(trans.getBasis().getRow(0).absolute(), 
+                                          trans.getBasis().getRow(1).absolute(), 
+                                          trans.getBasis().getRow(2).absolute());
 
 		m_min = center - textends;
 		m_max = center + textends;
@@ -381,10 +379,10 @@ public:
 		// Compute new center
 		center = trans.transform(center);
 
-		btVector3 textends(extends.dot(trans.m_R1to0.getRow(0).absolute()),
- 				 extends.dot(trans.m_R1to0.getRow(1).absolute()),
-				 extends.dot(trans.m_R1to0.getRow(2).absolute()));
-
+        btVector3 textends = extends.dot3(trans.m_R1to0.getRow(0).absolute(), 
+                                          trans.m_R1to0.getRow(1).absolute(), 
+                                          trans.m_R1to0.getRow(2).absolute());
+        
 		m_min = center - textends;
 		m_max = center + textends;
 	}
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btClipPolygon.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btClipPolygon.h
index 5de391a75..de0a5231b 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btClipPolygon.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btClipPolygon.h
@@ -2,7 +2,7 @@
 #define BT_CLIP_POLYGON_H_INCLUDED
 
 /*! \file btClipPolygon.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.cpp
index c3b697bdd..eed31d839 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.cpp
@@ -58,7 +58,7 @@ class CONTACT_KEY_TOKEN_COMP
 {
 	public:
 
-		bool operator() ( const CONTACT_KEY_TOKEN& a, const CONTACT_KEY_TOKEN& b )
+		bool operator() ( const CONTACT_KEY_TOKEN& a, const CONTACT_KEY_TOKEN& b ) const
 		{
 			return ( a < b );
 		}
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.h
index 4b0b70293..0c66f8e10 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btContactProcessing.h
@@ -2,7 +2,7 @@
 #define BT_CONTACT_H_INCLUDED
 
 /*! \file gim_contact.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.cpp
index 38fed6b4f..863233163 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.cpp
@@ -1,5 +1,5 @@
 /*! \file gim_box_set.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.h
index 074de4a46..6174ae97a 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactBvh.h
@@ -2,7 +2,7 @@
 #define GIM_BOX_SET_H_INCLUDED
 
 /*! \file gim_box_set.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
@@ -383,9 +383,9 @@ public:
 		return m_box_tree.get_node_pointer(index);
 	}
 
-
+#ifdef TRI_COLLISION_PROFILING
 	static float getAverageTreeCollisionTime();
-
+#endif //TRI_COLLISION_PROFILING
 
 	static void find_collision(btGImpactBvh * boxset1, const btTransform & trans1,
 		btGImpactBvh * boxset2, const btTransform & trans2,
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp
index e6da8f909..5997e443e 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.cpp
@@ -51,7 +51,7 @@ public:
 	}
 
 
-	void get_plane_equation_transformed(const btTransform & trans,btVector4 &equation)
+	void get_plane_equation_transformed(const btTransform & trans,btVector4 &equation) const 
 	{
 		equation[0] = trans.getBasis().getRow(0).dot(m_planeNormal);
 		equation[1] = trans.getBasis().getRow(1).dot(m_planeNormal);
@@ -89,7 +89,7 @@ Declared here due of insuficent space on Pool allocators
 class GIM_ShapeRetriever
 {
 public:
-	btGImpactShapeInterface * m_gim_shape;
+	const btGImpactShapeInterface * m_gim_shape;
 	btTriangleShapeEx m_trishape;
 	btTetrahedronShapeEx m_tetrashape;
 
@@ -98,7 +98,7 @@ public:
 	{
 	public:
 		GIM_ShapeRetriever * m_parent;
-		virtual btCollisionShape * getChildShape(int index)
+		virtual const btCollisionShape * getChildShape(int index)
 		{
 			return m_parent->m_gim_shape->getChildShape(index);
 		}
@@ -133,7 +133,7 @@ public:
 	TetraShapeRetriever  m_tetra_retriever;
 	ChildShapeRetriever * m_current_retriever;
 
-	GIM_ShapeRetriever(btGImpactShapeInterface * gim_shape)
+	GIM_ShapeRetriever(const btGImpactShapeInterface * gim_shape)
 	{
 		m_gim_shape = gim_shape;
 		//select retriever
@@ -153,7 +153,7 @@ public:
 		m_current_retriever->m_parent = this;
 	}
 
-	btCollisionShape * getChildShape(int index)
+	const btCollisionShape * getChildShape(int index)
 	{
 		return m_current_retriever->getChildShape(index);
 	}
@@ -193,8 +193,8 @@ float btGImpactCollisionAlgorithm::getAverageTriangleCollisionTime()
 
 
 
-btGImpactCollisionAlgorithm::btGImpactCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
-: btActivatingCollisionAlgorithm(ci,body0,body1)
+btGImpactCollisionAlgorithm::btGImpactCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
+: btActivatingCollisionAlgorithm(ci,body0Wrap,body1Wrap)
 {
 	m_manifoldPtr = NULL;
 	m_convex_algorithm = NULL;
@@ -209,71 +209,60 @@ btGImpactCollisionAlgorithm::~btGImpactCollisionAlgorithm()
 
 
 
-void btGImpactCollisionAlgorithm::addContactPoint(btCollisionObject * body0,
-				btCollisionObject * body1,
+void btGImpactCollisionAlgorithm::addContactPoint(const btCollisionObjectWrapper * body0Wrap,
+				const btCollisionObjectWrapper * body1Wrap,
 				const btVector3 & point,
 				const btVector3 & normal,
 				btScalar distance)
 {
 	m_resultOut->setShapeIdentifiersA(m_part0,m_triface0);
 	m_resultOut->setShapeIdentifiersB(m_part1,m_triface1);
-	checkManifold(body0,body1);
+	checkManifold(body0Wrap,body1Wrap);
 	m_resultOut->addContactPoint(normal,point,distance);
 }
 
 
 void btGImpactCollisionAlgorithm::shape_vs_shape_collision(
-					  btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btCollisionShape * shape0,
-					  btCollisionShape * shape1)
+					  const btCollisionObjectWrapper * body0Wrap,
+					  const btCollisionObjectWrapper* body1Wrap,
+					  const btCollisionShape * shape0,
+					  const btCollisionShape * shape1)
 {
 
-	btCollisionShape* tmpShape0 = body0->getCollisionShape();
-	btCollisionShape* tmpShape1 = body1->getCollisionShape();
-	
-	body0->internalSetTemporaryCollisionShape(shape0);
-	body1->internalSetTemporaryCollisionShape(shape1);
 
 	{
-		btCollisionAlgorithm* algor = newAlgorithm(body0,body1);
+		btCollisionObjectWrapper ob0(body0Wrap,shape0,body0Wrap->getCollisionObject(), body0Wrap->getWorldTransform());
+		btCollisionObjectWrapper ob1(body1Wrap,shape1,body1Wrap->getCollisionObject(),body1Wrap->getWorldTransform());
+
+		btCollisionAlgorithm* algor = newAlgorithm(&ob0,&ob1);
 		// post :	checkManifold is called
 
 		m_resultOut->setShapeIdentifiersA(m_part0,m_triface0);
 		m_resultOut->setShapeIdentifiersB(m_part1,m_triface1);
 
-		algor->processCollision(body0,body1,*m_dispatchInfo,m_resultOut);
+		algor->processCollision(&ob0,&ob1,*m_dispatchInfo,m_resultOut);
 
 		algor->~btCollisionAlgorithm();
 		m_dispatcher->freeCollisionAlgorithm(algor);
 	}
 
-	body0->internalSetTemporaryCollisionShape(tmpShape0);
-	body1->internalSetTemporaryCollisionShape(tmpShape1);
 }
 
 void btGImpactCollisionAlgorithm::convex_vs_convex_collision(
-					  btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btCollisionShape * shape0,
-					  btCollisionShape * shape1)
+					  const btCollisionObjectWrapper* body0Wrap,
+					  const btCollisionObjectWrapper* body1Wrap,
+					  const btCollisionShape* shape0,
+					  const btCollisionShape* shape1)
 {
 
-	btCollisionShape* tmpShape0 = body0->getCollisionShape();
-	btCollisionShape* tmpShape1 = body1->getCollisionShape();
-	
-	body0->internalSetTemporaryCollisionShape(shape0);
-	body1->internalSetTemporaryCollisionShape(shape1);
-
-
 	m_resultOut->setShapeIdentifiersA(m_part0,m_triface0);
 	m_resultOut->setShapeIdentifiersB(m_part1,m_triface1);
 
-	checkConvexAlgorithm(body0,body1);
-	m_convex_algorithm->processCollision(body0,body1,*m_dispatchInfo,m_resultOut);
+	btCollisionObjectWrapper ob0(body0Wrap,shape0,body0Wrap->getCollisionObject(),body0Wrap->getWorldTransform());
+	btCollisionObjectWrapper ob1(body1Wrap,shape1,body1Wrap->getCollisionObject(),body1Wrap->getWorldTransform());
+	checkConvexAlgorithm(&ob0,&ob1);
+	m_convex_algorithm->processCollision(&ob0,&ob1,*m_dispatchInfo,m_resultOut);
 
-	body0->internalSetTemporaryCollisionShape(tmpShape0);
-	body1->internalSetTemporaryCollisionShape(tmpShape1);
 
 }
 
@@ -283,8 +272,8 @@ void btGImpactCollisionAlgorithm::convex_vs_convex_collision(
 void btGImpactCollisionAlgorithm::gimpact_vs_gimpact_find_pairs(
 					  const btTransform & trans0,
 					  const btTransform & trans1,
-					  btGImpactShapeInterface * shape0,
-					  btGImpactShapeInterface * shape1,btPairSet & pairset)
+					  const btGImpactShapeInterface * shape0,
+					  const btGImpactShapeInterface * shape1,btPairSet & pairset)
 {
 	if(shape0->hasBoxSet() && shape1->hasBoxSet())
 	{
@@ -320,8 +309,8 @@ void btGImpactCollisionAlgorithm::gimpact_vs_gimpact_find_pairs(
 void btGImpactCollisionAlgorithm::gimpact_vs_shape_find_pairs(
 					  const btTransform & trans0,
 					  const btTransform & trans1,
-					  btGImpactShapeInterface * shape0,
-					  btCollisionShape * shape1,
+					  const btGImpactShapeInterface * shape0,
+					  const btCollisionShape * shape1,
 					  btAlignedObjectArray<int> & collided_primitives)
 {
 
@@ -359,10 +348,10 @@ void btGImpactCollisionAlgorithm::gimpact_vs_shape_find_pairs(
 }
 
 
-void btGImpactCollisionAlgorithm::collide_gjk_triangles(btCollisionObject * body0,
-				  btCollisionObject * body1,
-				  btGImpactMeshShapePart * shape0,
-				  btGImpactMeshShapePart * shape1,
+void btGImpactCollisionAlgorithm::collide_gjk_triangles(const btCollisionObjectWrapper * body0Wrap,
+				  const btCollisionObjectWrapper * body1Wrap,
+				  const btGImpactMeshShapePart * shape0,
+				  const btGImpactMeshShapePart * shape1,
 				  const int * pairs, int pair_count)
 {
 	btTriangleShapeEx tri0;
@@ -389,7 +378,7 @@ void btGImpactCollisionAlgorithm::collide_gjk_triangles(btCollisionObject * body
 		//collide two convex shapes
 		if(tri0.overlap_test_conservative(tri1))
 		{
-			convex_vs_convex_collision(body0,body1,&tri0,&tri1);
+			convex_vs_convex_collision(body0Wrap,body1Wrap,&tri0,&tri1);
 		}
 
 	}
@@ -398,14 +387,14 @@ void btGImpactCollisionAlgorithm::collide_gjk_triangles(btCollisionObject * body
 	shape1->unlockChildShapes();
 }
 
-void btGImpactCollisionAlgorithm::collide_sat_triangles(btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactMeshShapePart * shape0,
-					  btGImpactMeshShapePart * shape1,
+void btGImpactCollisionAlgorithm::collide_sat_triangles(const btCollisionObjectWrapper* body0Wrap,
+					  const btCollisionObjectWrapper* body1Wrap,
+					  const btGImpactMeshShapePart * shape0,
+					  const btGImpactMeshShapePart * shape1,
 					  const int * pairs, int pair_count)
 {
-	btTransform orgtrans0 = body0->getWorldTransform();
-	btTransform orgtrans1 = body1->getWorldTransform();
+	btTransform orgtrans0 = body0Wrap->getWorldTransform();
+	btTransform orgtrans1 = body1Wrap->getWorldTransform();
 
 	btPrimitiveTriangle ptri0;
 	btPrimitiveTriangle ptri1;
@@ -451,7 +440,7 @@ void btGImpactCollisionAlgorithm::collide_sat_triangles(btCollisionObject * body
 				while(j--)
 				{
 
-					addContactPoint(body0, body1,
+					addContactPoint(body0Wrap, body1Wrap,
 								contact_data.m_points[j],
 								contact_data.m_separating_normal,
 								-contact_data.m_penetration_depth);
@@ -472,20 +461,20 @@ void btGImpactCollisionAlgorithm::collide_sat_triangles(btCollisionObject * body
 
 
 void btGImpactCollisionAlgorithm::gimpact_vs_gimpact(
-						btCollisionObject * body0,
-					   	btCollisionObject * body1,
-					  	btGImpactShapeInterface * shape0,
-					  	btGImpactShapeInterface * shape1)
+						const btCollisionObjectWrapper* body0Wrap,
+					   	const btCollisionObjectWrapper * body1Wrap,
+					  	const btGImpactShapeInterface * shape0,
+					  	const btGImpactShapeInterface * shape1)
 {
 
 	if(shape0->getGImpactShapeType()==CONST_GIMPACT_TRIMESH_SHAPE)
 	{
-		btGImpactMeshShape * meshshape0 = static_cast<btGImpactMeshShape *>(shape0);
+		const btGImpactMeshShape * meshshape0 = static_cast<const btGImpactMeshShape *>(shape0);
 		m_part0 = meshshape0->getMeshPartCount();
 
 		while(m_part0--)
 		{
-			gimpact_vs_gimpact(body0,body1,meshshape0->getMeshPart(m_part0),shape1);
+			gimpact_vs_gimpact(body0Wrap,body1Wrap,meshshape0->getMeshPart(m_part0),shape1);
 		}
 
 		return;
@@ -493,13 +482,13 @@ void btGImpactCollisionAlgorithm::gimpact_vs_gimpact(
 
 	if(shape1->getGImpactShapeType()==CONST_GIMPACT_TRIMESH_SHAPE)
 	{
-		btGImpactMeshShape * meshshape1 = static_cast<btGImpactMeshShape *>(shape1);
+		const btGImpactMeshShape * meshshape1 = static_cast<const btGImpactMeshShape *>(shape1);
 		m_part1 = meshshape1->getMeshPartCount();
 
 		while(m_part1--)
 		{
 
-			gimpact_vs_gimpact(body0,body1,shape0,meshshape1->getMeshPart(m_part1));
+			gimpact_vs_gimpact(body0Wrap,body1Wrap,shape0,meshshape1->getMeshPart(m_part1));
 
 		}
 
@@ -507,8 +496,8 @@ void btGImpactCollisionAlgorithm::gimpact_vs_gimpact(
 	}
 
 
-	btTransform orgtrans0 = body0->getWorldTransform();
-	btTransform orgtrans1 = body1->getWorldTransform();
+	btTransform orgtrans0 = body0Wrap->getWorldTransform();
+	btTransform orgtrans1 = body1Wrap->getWorldTransform();
 
 	btPairSet pairset;
 
@@ -519,13 +508,13 @@ void btGImpactCollisionAlgorithm::gimpact_vs_gimpact(
 	if(shape0->getGImpactShapeType() == CONST_GIMPACT_TRIMESH_SHAPE_PART &&
 		shape1->getGImpactShapeType() == CONST_GIMPACT_TRIMESH_SHAPE_PART)
 	{
-		btGImpactMeshShapePart * shapepart0 = static_cast<btGImpactMeshShapePart * >(shape0);
-		btGImpactMeshShapePart * shapepart1 = static_cast<btGImpactMeshShapePart * >(shape1);
+		const btGImpactMeshShapePart * shapepart0 = static_cast<const btGImpactMeshShapePart * >(shape0);
+		const btGImpactMeshShapePart * shapepart1 = static_cast<const btGImpactMeshShapePart * >(shape1);
 		//specialized function
 		#ifdef BULLET_TRIANGLE_COLLISION
-		collide_gjk_triangles(body0,body1,shapepart0,shapepart1,&pairset[0].m_index1,pairset.size());
+		collide_gjk_triangles(body0Wrap,body1Wrap,shapepart0,shapepart1,&pairset[0].m_index1,pairset.size());
 		#else
-		collide_sat_triangles(body0,body1,shapepart0,shapepart1,&pairset[0].m_index1,pairset.size());
+		collide_sat_triangles(body0Wrap,body1Wrap,shapepart0,shapepart1,&pairset[0].m_index1,pairset.size());
 		#endif
 
 		return;
@@ -548,55 +537,49 @@ void btGImpactCollisionAlgorithm::gimpact_vs_gimpact(
 		GIM_PAIR * pair = &pairset[i];
 		m_triface0 = pair->m_index1;
 		m_triface1 = pair->m_index2;
-		btCollisionShape * colshape0 = retriever0.getChildShape(m_triface0);
-		btCollisionShape * colshape1 = retriever1.getChildShape(m_triface1);
+		const btCollisionShape * colshape0 = retriever0.getChildShape(m_triface0);
+		const btCollisionShape * colshape1 = retriever1.getChildShape(m_triface1);
+
+		btTransform tr0 = body0Wrap->getWorldTransform();
+		btTransform tr1 = body1Wrap->getWorldTransform();
 
 		if(child_has_transform0)
 		{
-			body0->setWorldTransform(orgtrans0*shape0->getChildTransform(m_triface0));
+			tr0 = orgtrans0*shape0->getChildTransform(m_triface0);
 		}
 
 		if(child_has_transform1)
 		{
-			body1->setWorldTransform(orgtrans1*shape1->getChildTransform(m_triface1));
+			tr1 = orgtrans1*shape1->getChildTransform(m_triface1);
 		}
 
+		btCollisionObjectWrapper ob0(body0Wrap,colshape0,body0Wrap->getCollisionObject(),tr0);
+		btCollisionObjectWrapper ob1(body1Wrap,colshape1,body1Wrap->getCollisionObject(),tr1);
+
 		//collide two convex shapes
-		convex_vs_convex_collision(body0,body1,colshape0,colshape1);
-
-
-		if(child_has_transform0)
-		{
-			body0->setWorldTransform(orgtrans0);
-		}
-
-		if(child_has_transform1)
-		{
-			body1->setWorldTransform(orgtrans1);
-		}
-
+		convex_vs_convex_collision(&ob0,&ob1,colshape0,colshape1);
 	}
 
 	shape0->unlockChildShapes();
 	shape1->unlockChildShapes();
 }
 
-void btGImpactCollisionAlgorithm::gimpact_vs_shape(btCollisionObject * body0,
-				  btCollisionObject * body1,
-				  btGImpactShapeInterface * shape0,
-				  btCollisionShape * shape1,bool swapped)
+void btGImpactCollisionAlgorithm::gimpact_vs_shape(const btCollisionObjectWrapper* body0Wrap,
+				  const btCollisionObjectWrapper * body1Wrap,
+				  const btGImpactShapeInterface * shape0,
+				  const btCollisionShape * shape1,bool swapped)
 {
 	if(shape0->getGImpactShapeType()==CONST_GIMPACT_TRIMESH_SHAPE)
 	{
-		btGImpactMeshShape * meshshape0 = static_cast<btGImpactMeshShape *>(shape0);
+		const btGImpactMeshShape * meshshape0 = static_cast<const btGImpactMeshShape *>(shape0);
 		int& part = swapped ? m_part1 : m_part0;
 		part = meshshape0->getMeshPartCount();
 
 		while(part--)
 		{
 
-			gimpact_vs_shape(body0,
-				  body1,
+			gimpact_vs_shape(body0Wrap,
+				  body1Wrap,
 				  meshshape0->getMeshPart(part),
 				  shape1,swapped);
 
@@ -609,9 +592,9 @@ void btGImpactCollisionAlgorithm::gimpact_vs_shape(btCollisionObject * body0,
 	if(shape0->getGImpactShapeType() == CONST_GIMPACT_TRIMESH_SHAPE_PART &&
 		shape1->getShapeType() == STATIC_PLANE_PROXYTYPE)
 	{
-		btGImpactMeshShapePart * shapepart = static_cast<btGImpactMeshShapePart *>(shape0);
-		btStaticPlaneShape * planeshape = static_cast<btStaticPlaneShape * >(shape1);
-		gimpacttrimeshpart_vs_plane_collision(body0,body1,shapepart,planeshape,swapped);
+		const btGImpactMeshShapePart * shapepart = static_cast<const btGImpactMeshShapePart *>(shape0);
+		const btStaticPlaneShape * planeshape = static_cast<const btStaticPlaneShape * >(shape1);
+		gimpacttrimeshpart_vs_plane_collision(body0Wrap,body1Wrap,shapepart,planeshape,swapped);
 		return;
 	}
 
@@ -621,21 +604,21 @@ void btGImpactCollisionAlgorithm::gimpact_vs_shape(btCollisionObject * body0,
 
 	if(shape1->isCompound())
 	{
-		btCompoundShape * compoundshape = static_cast<btCompoundShape *>(shape1);
-		gimpact_vs_compoundshape(body0,body1,shape0,compoundshape,swapped);
+		const btCompoundShape * compoundshape = static_cast<const btCompoundShape *>(shape1);
+		gimpact_vs_compoundshape(body0Wrap,body1Wrap,shape0,compoundshape,swapped);
 		return;
 	}
 	else if(shape1->isConcave())
 	{
-		btConcaveShape * concaveshape = static_cast<btConcaveShape *>(shape1);
-		gimpact_vs_concave(body0,body1,shape0,concaveshape,swapped);
+		const btConcaveShape * concaveshape = static_cast<const btConcaveShape *>(shape1);
+		gimpact_vs_concave(body0Wrap,body1Wrap,shape0,concaveshape,swapped);
 		return;
 	}
 
 
-	btTransform orgtrans0 = body0->getWorldTransform();
+	btTransform orgtrans0 = body0Wrap->getWorldTransform();
 
-	btTransform orgtrans1 = body1->getWorldTransform();
+	btTransform orgtrans1 = body1Wrap->getWorldTransform();
 
 	btAlignedObjectArray<int> collided_results;
 
@@ -662,27 +645,25 @@ void btGImpactCollisionAlgorithm::gimpact_vs_shape(btCollisionObject * body0,
         else
             m_triface0 = child_index;
 
-		btCollisionShape * colshape0 = retriever0.getChildShape(child_index);
+		const btCollisionShape * colshape0 = retriever0.getChildShape(child_index);
+
+		btTransform tr0 = body0Wrap->getWorldTransform();
 
 		if(child_has_transform0)
 		{
-			body0->setWorldTransform(orgtrans0*shape0->getChildTransform(child_index));
+			tr0 = orgtrans0*shape0->getChildTransform(child_index);
 		}
 
+		btCollisionObjectWrapper ob0(body0Wrap,colshape0,body0Wrap->getCollisionObject(),body0Wrap->getWorldTransform());
+
 		//collide two shapes
 		if(swapped)
 		{
-			shape_vs_shape_collision(body1,body0,shape1,colshape0);
+			shape_vs_shape_collision(body1Wrap,&ob0,shape1,colshape0);
 		}
 		else
 		{
-			shape_vs_shape_collision(body0,body1,colshape0,shape1);
-		}
-
-		//restore transforms
-		if(child_has_transform0)
-		{
-			body0->setWorldTransform(orgtrans0);
+			shape_vs_shape_collision(&ob0,body1Wrap,colshape0,shape1);
 		}
 
 	}
@@ -691,44 +672,39 @@ void btGImpactCollisionAlgorithm::gimpact_vs_shape(btCollisionObject * body0,
 
 }
 
-void btGImpactCollisionAlgorithm::gimpact_vs_compoundshape(btCollisionObject * body0,
-				  btCollisionObject * body1,
-				  btGImpactShapeInterface * shape0,
-				  btCompoundShape * shape1,bool swapped)
+void btGImpactCollisionAlgorithm::gimpact_vs_compoundshape(const btCollisionObjectWrapper* body0Wrap,
+				  const btCollisionObjectWrapper* body1Wrap,
+				  const btGImpactShapeInterface * shape0,
+				  const btCompoundShape * shape1,bool swapped)
 {
-	btTransform orgtrans1 = body1->getWorldTransform();
+	btTransform orgtrans1 = body1Wrap->getWorldTransform();
 
 	int i = shape1->getNumChildShapes();
 	while(i--)
 	{
 
-		btCollisionShape * colshape1 = shape1->getChildShape(i);
+		const btCollisionShape * colshape1 = shape1->getChildShape(i);
 		btTransform childtrans1 = orgtrans1*shape1->getChildTransform(i);
 
-		body1->setWorldTransform(childtrans1);
-
+		btCollisionObjectWrapper ob1(body1Wrap,colshape1,body1Wrap->getCollisionObject(),childtrans1);
 		//collide child shape
-		gimpact_vs_shape(body0, body1,
+		gimpact_vs_shape(body0Wrap, &ob1,
 					  shape0,colshape1,swapped);
-
-
-		//restore transforms
-		body1->setWorldTransform(orgtrans1);
 	}
 }
 
 void btGImpactCollisionAlgorithm::gimpacttrimeshpart_vs_plane_collision(
-					  btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactMeshShapePart * shape0,
-					  btStaticPlaneShape * shape1,bool swapped)
+					  const btCollisionObjectWrapper * body0Wrap,
+					  const btCollisionObjectWrapper * body1Wrap,
+					  const btGImpactMeshShapePart * shape0,
+					  const btStaticPlaneShape * shape1,bool swapped)
 {
 
 
-	btTransform orgtrans0 = body0->getWorldTransform();
-	btTransform orgtrans1 = body1->getWorldTransform();
+	btTransform orgtrans0 = body0Wrap->getWorldTransform();
+	btTransform orgtrans1 = body1Wrap->getWorldTransform();
 
-	btPlaneShape * planeshape = static_cast<btPlaneShape *>(shape1);
+	const btPlaneShape * planeshape = static_cast<const btPlaneShape *>(shape1);
 	btVector4 plane;
 	planeshape->get_plane_equation_transformed(orgtrans1,plane);
 
@@ -757,14 +733,14 @@ void btGImpactCollisionAlgorithm::gimpacttrimeshpart_vs_plane_collision(
 		{
 			if(swapped)
 			{
-				addContactPoint(body1, body0,
+				addContactPoint(body1Wrap, body0Wrap,
 					vertex,
 					-plane,
 					distance);
 			}
 			else
 			{
-				addContactPoint(body0, body1,
+				addContactPoint(body0Wrap, body1Wrap,
 					vertex,
 					plane,
 					distance);
@@ -782,9 +758,9 @@ class btGImpactTriangleCallback: public btTriangleCallback
 {
 public:
 	btGImpactCollisionAlgorithm * algorithm;
-	btCollisionObject * body0;
-	btCollisionObject * body1;
-	btGImpactShapeInterface * gimpactshape0;
+	const btCollisionObjectWrapper * body0Wrap;
+	const btCollisionObjectWrapper * body1Wrap;
+	const btGImpactShapeInterface * gimpactshape0;
 	bool swapped;
 	btScalar margin;
 
@@ -803,7 +779,7 @@ public:
             algorithm->setFace1(triangleIndex);
         }
 		algorithm->gimpact_vs_shape(
-							body0,body1,gimpactshape0,&tri1,swapped);
+							body0Wrap,body1Wrap,gimpactshape0,&tri1,swapped);
 	}
 };
 
@@ -811,16 +787,16 @@ public:
 
 
 void btGImpactCollisionAlgorithm::gimpact_vs_concave(
-				  btCollisionObject * body0,
-				  btCollisionObject * body1,
-				  btGImpactShapeInterface * shape0,
-				  btConcaveShape * shape1,bool swapped)
+				  const btCollisionObjectWrapper* body0Wrap,
+				  const btCollisionObjectWrapper * body1Wrap,
+				  const btGImpactShapeInterface * shape0,
+				  const btConcaveShape * shape1,bool swapped)
 {
 	//create the callback
 	btGImpactTriangleCallback tricallback;
 	tricallback.algorithm = this;
-	tricallback.body0 = body0;
-	tricallback.body1 = body1;
+	tricallback.body0Wrap = body0Wrap;
+	tricallback.body1Wrap = body1Wrap;
 	tricallback.gimpactshape0 = shape0;
 	tricallback.swapped = swapped;
 	tricallback.margin = shape1->getMargin();
@@ -828,7 +804,7 @@ void btGImpactCollisionAlgorithm::gimpact_vs_concave(
 	//getting the trimesh AABB
 	btTransform gimpactInConcaveSpace;
 
-	gimpactInConcaveSpace = body1->getWorldTransform().inverse() * body0->getWorldTransform();
+	gimpactInConcaveSpace = body1Wrap->getWorldTransform().inverse() * body0Wrap->getWorldTransform();
 
 	btVector3 minAABB,maxAABB;
 	shape0->getAabb(gimpactInConcaveSpace,minAABB,maxAABB);
@@ -839,36 +815,36 @@ void btGImpactCollisionAlgorithm::gimpact_vs_concave(
 
 
 
-void btGImpactCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btGImpactCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
     clearCache();
 
     m_resultOut = resultOut;
 	m_dispatchInfo = &dispatchInfo;
-    btGImpactShapeInterface * gimpactshape0;
-    btGImpactShapeInterface * gimpactshape1;
+    const btGImpactShapeInterface * gimpactshape0;
+    const btGImpactShapeInterface * gimpactshape1;
 
-	if (body0->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE)
+	if (body0Wrap->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE)
 	{
-		gimpactshape0 = static_cast<btGImpactShapeInterface *>(body0->getCollisionShape());
+		gimpactshape0 = static_cast<const btGImpactShapeInterface *>(body0Wrap->getCollisionShape());
 
-		if( body1->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE )
+		if( body1Wrap->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE )
 		{
-			gimpactshape1 = static_cast<btGImpactShapeInterface *>(body1->getCollisionShape());
+			gimpactshape1 = static_cast<const btGImpactShapeInterface *>(body1Wrap->getCollisionShape());
 
-			gimpact_vs_gimpact(body0,body1,gimpactshape0,gimpactshape1);
+			gimpact_vs_gimpact(body0Wrap,body1Wrap,gimpactshape0,gimpactshape1);
 		}
 		else
 		{
-			gimpact_vs_shape(body0,body1,gimpactshape0,body1->getCollisionShape(),false);
+			gimpact_vs_shape(body0Wrap,body1Wrap,gimpactshape0,body1Wrap->getCollisionShape(),false);
 		}
 
 	}
-	else if (body1->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE )
+	else if (body1Wrap->getCollisionShape()->getShapeType()==GIMPACT_SHAPE_PROXYTYPE )
 	{
-		gimpactshape1 = static_cast<btGImpactShapeInterface *>(body1->getCollisionShape());
+		gimpactshape1 = static_cast<const btGImpactShapeInterface *>(body1Wrap->getCollisionShape());
 
-		gimpact_vs_shape(body1,body0,gimpactshape1,body0->getCollisionShape(),true);
+		gimpact_vs_shape(body1Wrap,body0Wrap,gimpactshape1,body0Wrap->getCollisionShape(),true);
 	}
 }
 
@@ -881,22 +857,24 @@ btScalar btGImpactCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* b
 
 ///////////////////////////////////// REGISTERING ALGORITHM //////////////////////////////////////////////
 
-btGImpactCollisionAlgorithm::CreateFunc g_gimpact_cf;
+
 
 //! Use this function for register the algorithm externally
 void btGImpactCollisionAlgorithm::registerAlgorithm(btCollisionDispatcher * dispatcher)
 {
 
+	static btGImpactCollisionAlgorithm::CreateFunc s_gimpact_cf;
+
 	int i;
 
 	for ( i = 0;i < MAX_BROADPHASE_COLLISION_TYPES ;i++ )
 	{
-		dispatcher->registerCollisionCreateFunc(GIMPACT_SHAPE_PROXYTYPE,i ,&g_gimpact_cf);
+		dispatcher->registerCollisionCreateFunc(GIMPACT_SHAPE_PROXYTYPE,i ,&s_gimpact_cf);
 	}
 
 	for ( i = 0;i < MAX_BROADPHASE_COLLISION_TYPES ;i++ )
 	{
-		dispatcher->registerCollisionCreateFunc(i,GIMPACT_SHAPE_PROXYTYPE ,&g_gimpact_cf);
+		dispatcher->registerCollisionCreateFunc(i,GIMPACT_SHAPE_PROXYTYPE ,&s_gimpact_cf);
 	}
 
 }
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h
index 453472aa0..c01b1eee8 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactCollisionAlgorithm.h
@@ -1,5 +1,5 @@
 /*! \file btGImpactShape.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
@@ -21,8 +21,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef BVH_CONCAVE_COLLISION_ALGORITHM_H
-#define BVH_CONCAVE_COLLISION_ALGORITHM_H
+#ifndef BT_GIMPACT_BVH_CONCAVE_COLLISION_ALGORITHM_H
+#define BT_GIMPACT_BVH_CONCAVE_COLLISION_ALGORITHM_H
 
 #include "BulletCollision/CollisionDispatch/btActivatingCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btDispatcher.h"
@@ -40,7 +40,7 @@ class btDispatcher;
 #include "BulletCollision/CollisionShapes/btCompoundShape.h"
 #include "BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h"
 #include "LinearMath/btIDebugDraw.h"
-
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 
 //! Collision Algorithm for GImpact Shapes
@@ -65,7 +65,7 @@ protected:
 
 
 	//! Creates a new contact point
-	SIMD_FORCE_INLINE btPersistentManifold* newContactManifold(btCollisionObject* body0,btCollisionObject* body1)
+	SIMD_FORCE_INLINE btPersistentManifold* newContactManifold(const btCollisionObject* body0,const btCollisionObject* body1)
 	{
 		m_manifoldPtr = m_dispatcher->getNewManifold(body0,body1);
 		return m_manifoldPtr;
@@ -106,38 +106,38 @@ protected:
 
 
 	// Call before process collision
-	SIMD_FORCE_INLINE void checkManifold(btCollisionObject* body0,btCollisionObject* body1)
+	SIMD_FORCE_INLINE void checkManifold(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 	{
 		if(getLastManifold() == 0)
 		{
-			newContactManifold(body0,body1);
+			newContactManifold(body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject());
 		}
 
 		m_resultOut->setPersistentManifold(getLastManifold());
 	}
 
 	// Call before process collision
-	SIMD_FORCE_INLINE btCollisionAlgorithm * newAlgorithm(btCollisionObject* body0,btCollisionObject* body1)
+	SIMD_FORCE_INLINE btCollisionAlgorithm * newAlgorithm(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 	{
-		checkManifold(body0,body1);
+		checkManifold(body0Wrap,body1Wrap);
 
 		btCollisionAlgorithm * convex_algorithm = m_dispatcher->findAlgorithm(
-				body0,body1,getLastManifold());
+				body0Wrap,body1Wrap,getLastManifold());
 		return convex_algorithm ;
 	}
 
 	// Call before process collision
-	SIMD_FORCE_INLINE void checkConvexAlgorithm(btCollisionObject* body0,btCollisionObject* body1)
+	SIMD_FORCE_INLINE void checkConvexAlgorithm(const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 	{
 		if(m_convex_algorithm) return;
-		m_convex_algorithm = newAlgorithm(body0,body1);
+		m_convex_algorithm = newAlgorithm(body0Wrap,body1Wrap);
 	}
 
 
 
 
-	void addContactPoint(btCollisionObject * body0,
-					btCollisionObject * body1,
+	void addContactPoint(const btCollisionObjectWrapper * body0Wrap,
+					const btCollisionObjectWrapper * body1Wrap,
 					const btVector3 & point,
 					const btVector3 & normal,
 					btScalar distance);
@@ -145,62 +145,62 @@ protected:
 //! Collision routines
 //!@{
 
-	void collide_gjk_triangles(btCollisionObject * body0,
-				  btCollisionObject * body1,
-				  btGImpactMeshShapePart * shape0,
-				  btGImpactMeshShapePart * shape1,
+	void collide_gjk_triangles(const btCollisionObjectWrapper* body0Wrap,
+				  const btCollisionObjectWrapper* body1Wrap,
+				  const btGImpactMeshShapePart * shape0,
+				  const btGImpactMeshShapePart * shape1,
 				  const int * pairs, int pair_count);
 
-	void collide_sat_triangles(btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactMeshShapePart * shape0,
-					  btGImpactMeshShapePart * shape1,
+	void collide_sat_triangles(const btCollisionObjectWrapper* body0Wrap,
+					  const btCollisionObjectWrapper* body1Wrap,
+					  const btGImpactMeshShapePart * shape0,
+					  const btGImpactMeshShapePart * shape1,
 					  const int * pairs, int pair_count);
 
 
 
 
 	void shape_vs_shape_collision(
-					  btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btCollisionShape * shape0,
-					  btCollisionShape * shape1);
+					  const btCollisionObjectWrapper* body0,
+					  const btCollisionObjectWrapper* body1,
+					  const btCollisionShape * shape0,
+					  const btCollisionShape * shape1);
 
-	void convex_vs_convex_collision(btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btCollisionShape * shape0,
-					  btCollisionShape * shape1);
+	void convex_vs_convex_collision(const btCollisionObjectWrapper* body0Wrap,
+					  const btCollisionObjectWrapper* body1Wrap,
+					  const btCollisionShape* shape0,
+					  const btCollisionShape* shape1);
 
 
 
 	void gimpact_vs_gimpact_find_pairs(
 					  const btTransform & trans0,
 					  const btTransform & trans1,
-					  btGImpactShapeInterface * shape0,
-					  btGImpactShapeInterface * shape1,btPairSet & pairset);
+					  const btGImpactShapeInterface * shape0,
+					  const btGImpactShapeInterface * shape1,btPairSet & pairset);
 
 	void gimpact_vs_shape_find_pairs(
 					  const btTransform & trans0,
 					  const btTransform & trans1,
-					  btGImpactShapeInterface * shape0,
-					  btCollisionShape * shape1,
+					  const btGImpactShapeInterface * shape0,
+					  const btCollisionShape * shape1,
 					  btAlignedObjectArray<int> & collided_primitives);
 
 
 	void gimpacttrimeshpart_vs_plane_collision(
-					  btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactMeshShapePart * shape0,
-					  btStaticPlaneShape * shape1,bool swapped);
+					  const btCollisionObjectWrapper * body0Wrap,
+					  const btCollisionObjectWrapper * body1Wrap,
+					  const btGImpactMeshShapePart * shape0,
+					  const btStaticPlaneShape * shape1,bool swapped);
 
 
 public:
 
-	btGImpactCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+	btGImpactCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap);
 
 	virtual ~btGImpactCollisionAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -213,22 +213,22 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btGImpactCollisionAlgorithm));
-			return new(mem) btGImpactCollisionAlgorithm(ci,body0,body1);
+			return new(mem) btGImpactCollisionAlgorithm(ci,body0Wrap,body1Wrap);
 		}
 	};
 
 	//! Use this function for register the algorithm externally
 	static void registerAlgorithm(btCollisionDispatcher * dispatcher);
-
+#ifdef TRI_COLLISION_PROFILING
 	//! Gets the average time in miliseconds of tree collisions
 	static float getAverageTreeCollisionTime();
 
 	//! Gets the average time in miliseconds of triangle collisions
 	static float getAverageTriangleCollisionTime();
-
+#endif //TRI_COLLISION_PROFILING
 
 	//! Collides two gimpact shapes
 	/*!
@@ -236,26 +236,26 @@ public:
 	*/
 
 
-	void gimpact_vs_gimpact(btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactShapeInterface * shape0,
-					  btGImpactShapeInterface * shape1);
+	void gimpact_vs_gimpact(const btCollisionObjectWrapper* body0Wrap,
+					  const btCollisionObjectWrapper * body1Wrap,
+					  const btGImpactShapeInterface * shape0,
+					  const btGImpactShapeInterface * shape1);
 
-	void gimpact_vs_shape(btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactShapeInterface * shape0,
-					  btCollisionShape * shape1,bool swapped);
+	void gimpact_vs_shape(const btCollisionObjectWrapper* body0Wrap,
+					  const btCollisionObjectWrapper* body1Wrap,
+					  const btGImpactShapeInterface * shape0,
+					  const btCollisionShape * shape1,bool swapped);
 
-	void gimpact_vs_compoundshape(btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactShapeInterface * shape0,
-					  btCompoundShape * shape1,bool swapped);
+	void gimpact_vs_compoundshape(const btCollisionObjectWrapper * body0Wrap,
+					  const btCollisionObjectWrapper * body1Wrap,
+					  const btGImpactShapeInterface * shape0,
+					  const btCompoundShape * shape1,bool swapped);
 
 	void gimpact_vs_concave(
-					  btCollisionObject * body0,
-					  btCollisionObject * body1,
-					  btGImpactShapeInterface * shape0,
-					  btConcaveShape * shape1,bool swapped);
+					  const btCollisionObjectWrapper * body0Wrap,
+					  const btCollisionObjectWrapper * body1Wrap,
+					  const btGImpactShapeInterface * shape0,
+					  const btConcaveShape * shape1,bool swapped);
 
 
 
@@ -303,4 +303,4 @@ public:
 
 
 
-#endif //BVH_CONCAVE_COLLISION_ALGORITHM_H
+#endif //BT_GIMPACT_BVH_CONCAVE_COLLISION_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactMassUtil.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactMassUtil.h
index 0a10f3cdb..2543aefcf 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactMassUtil.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactMassUtil.h
@@ -1,5 +1,5 @@
 /*! \file btGImpactMassUtil.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp
index ea1647a81..4528758c3 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.cpp
@@ -1,5 +1,5 @@
 /*! \file gim_box_set.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
@@ -384,7 +384,7 @@ bool btGImpactQuantizedBvh::rayQuery(
 
 
 SIMD_FORCE_INLINE bool _quantized_node_collision(
-	btGImpactQuantizedBvh * boxset0, btGImpactQuantizedBvh * boxset1,
+	const btGImpactQuantizedBvh * boxset0, const btGImpactQuantizedBvh * boxset1,
 	const BT_BOX_BOX_TRANSFORM_CACHE & trans_cache_1to0,
 	int node0 ,int node1, bool complete_primitive_tests)
 {
@@ -402,7 +402,7 @@ SIMD_FORCE_INLINE bool _quantized_node_collision(
 
 //stackless recursive collision routine
 static void _find_quantized_collision_pairs_recursive(
-	btGImpactQuantizedBvh * boxset0, btGImpactQuantizedBvh * boxset1,
+	const btGImpactQuantizedBvh * boxset0, const btGImpactQuantizedBvh * boxset1,
 	btPairSet * collision_pairs,
 	const BT_BOX_BOX_TRANSFORM_CACHE & trans_cache_1to0,
 	int node0, int node1, bool complete_primitive_tests)
@@ -501,8 +501,8 @@ static void _find_quantized_collision_pairs_recursive(
 }
 
 
-void btGImpactQuantizedBvh::find_collision(btGImpactQuantizedBvh * boxset0, const btTransform & trans0,
-		btGImpactQuantizedBvh * boxset1, const btTransform & trans1,
+void btGImpactQuantizedBvh::find_collision(const btGImpactQuantizedBvh * boxset0, const btTransform & trans0,
+		const btGImpactQuantizedBvh * boxset1, const btTransform & trans1,
 		btPairSet & collision_pairs)
 {
 
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.h
index e9cccac75..e6e52fff4 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactQuantizedBvh.h
@@ -2,7 +2,7 @@
 #define GIM_QUANTIZED_SET_H_INCLUDED
 
 /*! \file btGImpactQuantizedBvh.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
@@ -359,12 +359,12 @@ public:
 		return m_box_tree.get_node_pointer(index);
 	}
 
-
+#ifdef TRI_COLLISION_PROFILING
 	static float getAverageTreeCollisionTime();
+#endif //TRI_COLLISION_PROFILING
 
-
-	static void find_collision(btGImpactQuantizedBvh * boxset1, const btTransform & trans1,
-		btGImpactQuantizedBvh * boxset2, const btTransform & trans2,
+	static void find_collision(const btGImpactQuantizedBvh * boxset1, const btTransform & trans1,
+		const btGImpactQuantizedBvh * boxset2, const btTransform & trans2,
 		btPairSet & collision_pairs);
 };
 
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.cpp
index da6a4dbfc..cceace55e 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.cpp
@@ -181,3 +181,23 @@ void btGImpactMeshShape::processAllTriangles(btTriangleCallback* callback,const
 		m_mesh_parts[i]->processAllTriangles(callback,aabbMin,aabbMax);
 	}
 }
+
+
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btGImpactMeshShape::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btGImpactMeshShapeData* trimeshData = (btGImpactMeshShapeData*) dataBuffer;
+
+	btCollisionShape::serialize(&trimeshData->m_collisionShapeData,serializer);
+
+	m_meshInterface->serialize(&trimeshData->m_meshInterface, serializer);
+
+	trimeshData->m_collisionMargin = float(m_collisionMargin);
+
+	localScaling.serializeFloat(trimeshData->m_localScaling);
+
+	trimeshData->m_gimpactSubType = int(getGImpactShapeType());
+
+	return "btGImpactMeshShapeData";
+}
+
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.h
index 598dc6d5fd..dbcd4d701 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGImpactShape.h
@@ -189,10 +189,10 @@ public:
 	//!@{
 
 	//! Base method for determinig which kind of GIMPACT shape we get
-	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() = 0;
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const = 0 ;
 
 	//! gets boxset
-	SIMD_FORCE_INLINE btGImpactBoxSet * getBoxSet()
+	SIMD_FORCE_INLINE const btGImpactBoxSet * getBoxSet() const
 	{
 		return &m_box_set;
 	}
@@ -276,6 +276,7 @@ public:
 	//! virtual method for ray collision
 	virtual void rayTest(const btVector3& rayFrom, const btVector3& rayTo, btCollisionWorld::RayResultCallback& resultCallback)  const
 	{
+        (void) rayFrom; (void) rayTo; (void) resultCallback;
 	}
 
 	//! Function for retrieve triangles.
@@ -284,6 +285,7 @@ public:
 	*/
 	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
 	{
+        (void) callback; (void) aabbMin; (void) aabbMax;
 	}
 
 	//!@}
@@ -307,6 +309,7 @@ public:
 
 
 		CompoundPrimitiveManager(const CompoundPrimitiveManager& compound)
+            : btPrimitiveManagerBase()
 		{
 			m_compoundShape = compound.m_compoundShape;
 		}
@@ -349,6 +352,7 @@ public:
 		virtual void get_primitive_triangle(int prim_index,btPrimitiveTriangle & triangle) const
 		{
 			btAssert(0);
+            (void) prim_index; (void) triangle;
 		}
 
 	};
@@ -365,6 +369,7 @@ public:
 
 	btGImpactCompoundShape(bool children_has_transform = true)
 	{
+        (void) children_has_transform;
 		m_primitive_manager.m_compoundShape = this;
 		m_box_set.setPrimitiveManager(&m_primitive_manager);
 	}
@@ -478,11 +483,13 @@ public:
 
 	virtual void getBulletTriangle(int prim_index,btTriangleShapeEx & triangle) const
 	{
+        (void) prim_index; (void) triangle;
 		btAssert(0);
 	}
 
 	virtual void getBulletTetrahedron(int prim_index,btTetrahedronShapeEx & tetrahedron) const
 	{
+        (void) prim_index; (void) tetrahedron;
 		btAssert(0);
 	}
 
@@ -495,7 +502,7 @@ public:
 		return "GImpactCompound";
 	}
 
-	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType()
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const
 	{
 		return CONST_GIMPACT_COMPOUND_SHAPE;
 	}
@@ -551,6 +558,7 @@ public:
 		}
 
  		TrimeshPrimitiveManager(const TrimeshPrimitiveManager & manager)
+            : btPrimitiveManagerBase()
 		{
 			m_meshInterface = manager.m_meshInterface;
 			m_part = manager.m_part;
@@ -631,7 +639,7 @@ public:
 		{
 			if(indicestype == PHY_SHORT)
 			{
-				short * s_indices = (short *)(indexbase + face_index*indexstride);
+				unsigned short * s_indices = (unsigned short *)(indexbase + face_index*indexstride);
 				i0 = s_indices[0];
 				i1 = s_indices[1];
 				i2 = s_indices[2];
@@ -748,6 +756,7 @@ public:
 	//! Gets the children
 	virtual btCollisionShape* getChildShape(int index)
 	{
+        (void) index;
 		btAssert(0);
 		return NULL;
 	}
@@ -757,6 +766,7 @@ public:
 	//! Gets the child
 	virtual const btCollisionShape* getChildShape(int index) const
 	{
+        (void) index;
 		btAssert(0);
 		return NULL;
 	}
@@ -764,6 +774,7 @@ public:
 	//! Gets the children transform
 	virtual btTransform	getChildTransform(int index) const
 	{
+        (void) index;
 		btAssert(0);
 		return btTransform();
 	}
@@ -774,6 +785,8 @@ public:
 	*/
 	virtual void setChildTransform(int index, const btTransform & transform)
 	{
+        (void) index;
+        (void) transform;
 		btAssert(0);
 	}
 
@@ -803,7 +816,7 @@ public:
 		return "GImpactMeshShapePart";
 	}
 
-	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType()
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const
 	{
 		return CONST_GIMPACT_TRIMESH_SHAPE_PART;
 	}
@@ -827,6 +840,8 @@ public:
 
 	virtual void getBulletTetrahedron(int prim_index,btTetrahedronShapeEx & tetrahedron) const
 	{
+        (void) prim_index;
+        (void) tetrahedron;
 		btAssert(0);
 	}
 
@@ -1038,11 +1053,13 @@ public:
 
 	virtual void getBulletTriangle(int prim_index,btTriangleShapeEx & triangle) const
 	{
+        (void) prim_index; (void) triangle;
 		btAssert(0);
 	}
 
 	virtual void getBulletTetrahedron(int prim_index,btTetrahedronShapeEx & tetrahedron) const
 	{
+        (void) prim_index; (void) tetrahedron;
 		btAssert(0);
 	}
 
@@ -1065,12 +1082,14 @@ public:
     */
     virtual void getChildAabb(int child_index,const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
     {
+        (void) child_index; (void) t; (void) aabbMin; (void) aabbMax;
         btAssert(0);
     }
 
 	//! Gets the children
 	virtual btCollisionShape* getChildShape(int index)
 	{
+        (void) index;
 		btAssert(0);
 		return NULL;
 	}
@@ -1079,6 +1098,7 @@ public:
 	//! Gets the child
 	virtual const btCollisionShape* getChildShape(int index) const
 	{
+        (void) index;
 		btAssert(0);
 		return NULL;
 	}
@@ -1086,6 +1106,7 @@ public:
 	//! Gets the children transform
 	virtual btTransform	getChildTransform(int index) const
 	{
+        (void) index;
 		btAssert(0);
 		return btTransform();
 	}
@@ -1096,11 +1117,12 @@ public:
 	*/
 	virtual void setChildTransform(int index, const btTransform & transform)
 	{
+        (void) index; (void) transform;
 		btAssert(0);
 	}
 
 
-	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType()
+	virtual eGIMPACT_SHAPE_TYPE getGImpactShapeType() const
 	{
 		return CONST_GIMPACT_TRIMESH_SHAPE;
 	}
@@ -1118,7 +1140,32 @@ public:
 	It gives the triangles in local space
 	*/
 	virtual void	processAllTriangles(btTriangleCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 };
 
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btGImpactMeshShapeData
+{
+	btCollisionShapeData	m_collisionShapeData;
+
+	btStridingMeshInterfaceData m_meshInterface;
+
+	btVector3FloatData	m_localScaling;
+
+	float	m_collisionMargin;
+
+	int		m_gimpactSubType;
+};
+
+SIMD_FORCE_INLINE	int	btGImpactMeshShape::calculateSerializeBufferSize() const
+{
+	return sizeof(btGImpactMeshShapeData);
+}
+
 
 #endif //GIMPACT_MESH_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.cpp
index 956fa0430..5d07d1adb 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.cpp
@@ -1,5 +1,5 @@
 /*! \file btGenericPoolAllocator.cpp
-\author Francisco Len N�jera. email projectileman@yahoo.com
+\author Francisco Leon Najera. email projectileman@yahoo.com
 
 General purpose allocator class
 */
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.h
index dc8143124..b46d85163 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGenericPoolAllocator.h
@@ -1,5 +1,5 @@
 /*! \file btGenericPoolAllocator.h
-\author Francisco Len N�jera. email projectileman@yahoo.com
+\author Francisco Leon Najera. email projectileman@yahoo.com
 
 General purpose allocator class
 */
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGeometryOperations.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGeometryOperations.h
index bc5a416dd..60f06510a 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btGeometryOperations.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btGeometryOperations.h
@@ -2,7 +2,7 @@
 #define BT_BASIC_GEOMETRY_OPERATIONS_H_INCLUDED
 
 /*! \file btGeometryOperations.h
-*\author Francisco Len N�jera
+*\author Francisco Leon Najera
 
 */
 /*
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btQuantization.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btQuantization.h
index 7faada61c..bd2633cfc 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btQuantization.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btQuantization.h
@@ -1,8 +1,8 @@
-#ifndef BT_QUANTIZATION_H_INCLUDED
-#define BT_QUANTIZATION_H_INCLUDED
+#ifndef BT_GIMPACT_QUANTIZATION_H_INCLUDED
+#define BT_GIMPACT_QUANTIZATION_H_INCLUDED
 
 /*! \file btQuantization.h
-*\author Francisco Len N�jera
+*\author Francisco Leon Najera
 
 */
 /*
@@ -85,4 +85,4 @@ SIMD_FORCE_INLINE btVector3 bt_unquantize(
 
 
 
-#endif // GIM_VECTOR_H_INCLUDED
+#endif // BT_GIMPACT_QUANTIZATION_H_INCLUDED
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.cpp
index 78696277c..ca76cc54a 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.cpp
@@ -1,5 +1,5 @@
 /*! \file btGImpactTriangleShape.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.h
index bbd6b630c..973c2ed12 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/btTriangleShapeEx.h
@@ -1,5 +1,5 @@
 /*! \file btGImpactShape.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 This source file is part of GIMPACT Library.
@@ -22,8 +22,8 @@ subject to the following restrictions:
 */
 
 
-#ifndef TRIANGLE_SHAPE_EX_H
-#define TRIANGLE_SHAPE_EX_H
+#ifndef GIMPACT_TRIANGLE_SHAPE_EX_H
+#define GIMPACT_TRIANGLE_SHAPE_EX_H
 
 #include "BulletCollision/CollisionShapes/btCollisionShape.h"
 #include "BulletCollision/CollisionShapes/btTriangleShape.h"
@@ -177,4 +177,4 @@ public:
 };
 
 
-#endif //TRIANGLE_MESH_SHAPE_H
+#endif //GIMPACT_TRIANGLE_MESH_SHAPE_H
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_array.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_array.h
index c8161d252..27e6f32fc 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_array.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_array.h
@@ -1,7 +1,7 @@
 #ifndef GIM_ARRAY_H_INCLUDED
 #define GIM_ARRAY_H_INCLUDED
 /*! \file gim_array.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
@@ -285,18 +285,16 @@ public:
 	    m_data[index] = obj;
 	}
 
-	inline void resize(GUINT size, bool call_constructor = true)
+	inline void resize(GUINT size, bool call_constructor = true, const T& fillData=T())
 	{
-
 	    if(size>m_size)
 	    {
             reserve(size);
             if(call_constructor)
             {
-            	T obj;
                 while(m_size<size)
                 {
-                    m_data[m_size] = obj;
+                    m_data[m_size] = fillData;
                     m_size++;
                 }
             }
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_basic_geometry_operations.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_basic_geometry_operations.h
index 666abf791..915277404 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_basic_geometry_operations.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_basic_geometry_operations.h
@@ -2,7 +2,7 @@
 #define GIM_BASIC_GEOMETRY_OPERATIONS_H_INCLUDED
 
 /*! \file gim_basic_geometry_operations.h
-*\author Francisco Len N�jera
+*\author Francisco Leon Najera
 type independant geometry routines
 
 */
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_bitset.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_bitset.h
index 322004a8d..7dee48a4c 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_bitset.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_bitset.h
@@ -1,7 +1,7 @@
 #ifndef GIM_BITSET_H_INCLUDED
 #define GIM_BITSET_H_INCLUDED
 /*! \file gim_bitset.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_collision.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_collision.h
index 0add5e4b9..9c572638a 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_collision.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_collision.h
@@ -2,7 +2,7 @@
 #define GIM_BOX_COLLISION_H_INCLUDED
 
 /*! \file gim_box_collision.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
@@ -186,9 +186,7 @@ public:
 
 	SIMD_FORCE_INLINE btVector3 transform(const btVector3 & point)
 	{
-		return btVector3(m_R1to0[0].dot(point) + m_T1to0.x(),
-			m_R1to0[1].dot(point) + m_T1to0.y(),
-			m_R1to0[2].dot(point) + m_T1to0.z());
+        return point.dot3(m_R1to0[0], m_R1to0[1], m_R1to0[2]) + m_T1to0;
 	}
 };
 
@@ -332,10 +330,10 @@ public:
 		// Compute new center
 		center = trans(center);
 
-		btVector3 textends(extends.dot(trans.getBasis().getRow(0).absolute()),
- 				 extends.dot(trans.getBasis().getRow(1).absolute()),
-				 extends.dot(trans.getBasis().getRow(2).absolute()));
-
+        btVector3 textends = extends.dot3(trans.getBasis().getRow(0).absolute(), 
+                                          trans.getBasis().getRow(1).absolute(), 
+                                          trans.getBasis().getRow(2).absolute());
+        
 		m_min = center - textends;
 		m_max = center + textends;
 	}
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_set.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_set.h
index 1058a0872..61d190a7d 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_set.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_box_set.h
@@ -2,7 +2,7 @@
 #define GIM_BOX_SET_H_INCLUDED
 
 /*! \file gim_box_set.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
@@ -91,6 +91,7 @@ class GIM_PRIMITIVE_MANAGER_PROTOTYPE
 {
 public:
 
+	virtual ~GIM_PRIMITIVE_MANAGER_PROTOTYPE() {}
 	//! determines if this manager consist on only triangles, which special case will be optimized
 	virtual bool is_trimesh() = 0;
 	virtual GUINT get_primitive_count() = 0;
@@ -669,3 +670,5 @@ public:
 
 
 #endif // GIM_BOXPRUNING_H_INCLUDED
+
+
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_clip_polygon.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_clip_polygon.h
index a91fd3aa4..e342459ce 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_clip_polygon.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_clip_polygon.h
@@ -2,7 +2,7 @@
 #define GIM_CLIP_POLYGON_H_INCLUDED
 
 /*! \file gim_tri_collision.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_contact.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_contact.h
index ad3e12298..5d9f8ef81 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_contact.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_contact.h
@@ -2,7 +2,7 @@
 #define GIM_CONTACT_H_INCLUDED
 
 /*! \file gim_contact.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geom_types.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geom_types.h
index 2fb1e3f9e..6b8f9ea6c 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geom_types.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geom_types.h
@@ -2,7 +2,7 @@
 #define GIM_GEOM_TYPES_H_INCLUDED
 
 /*! \file gim_geom_types.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geometry.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geometry.h
index 6cc416721..c67a6991c 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geometry.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_geometry.h
@@ -2,7 +2,7 @@
 #define GIM_GEOMETRY_H_INCLUDED
 
 /*! \file gim_geometry.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_hash_table.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_hash_table.h
index 93c66f818..e4237c2c5 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_hash_table.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_hash_table.h
@@ -1,7 +1,7 @@
 #ifndef GIM_HASH_TABLE_H_INCLUDED
 #define GIM_HASH_TABLE_H_INCLUDED
 /*! \file gim_trimesh_data.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_linear_math.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_linear_math.h
index 0247d4e61..64f11b495 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_linear_math.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_linear_math.h
@@ -2,7 +2,7 @@
 #define GIM_LINEAR_H_INCLUDED
 
 /*! \file gim_linear_math.h
-*\author Francisco Len N�jera
+*\author Francisco Leon Najera
 Type Independant Vector and matrix operations.
 */
 /*
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_math.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_math.h
index 8b9e6806e..939079e10 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_math.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_math.h
@@ -1,7 +1,7 @@
 #ifndef GIM_MATH_H_INCLUDED
 #define GIM_MATH_H_INCLUDED
 /*! \file gim_math.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_memory.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_memory.h
index ef396c807..e203888a1 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_memory.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_memory.h
@@ -1,7 +1,7 @@
 #ifndef GIM_MEMORY_H_INCLUDED
 #define GIM_MEMORY_H_INCLUDED
 /*! \file gim_memory.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
@@ -117,7 +117,7 @@ void gim_free(void *ptr);
 
 
 
-#if defined (WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+#if defined (_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
     #define GIM_SIMD_MEMORY 1
 #endif
 
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_radixsort.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_radixsort.h
index f7dadbbca..c246ef125 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_radixsort.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_radixsort.h
@@ -1,7 +1,7 @@
 #ifndef GIM_RADIXSORT_H_INCLUDED
 #define GIM_RADIXSORT_H_INCLUDED
 /*! \file gim_radixsort.h
-\author Francisco Len N�jera.
+\author Francisco Leon Najera.
 Based on the work of Michael Herf : "fast floating-point radix sort"
 Avaliable on http://www.stereopsis.com/radix.html
 */
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.cpp b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.cpp
index 74d734146..f9727e1d5 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.cpp
@@ -1,6 +1,6 @@
 
 /*! \file gim_tri_collision.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.h b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.h
index 2d6e43a1a..5b552a1ed 100644
--- a/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.h
+++ b/Engine/lib/bullet/src/BulletCollision/Gimpact/gim_tri_collision.h
@@ -2,7 +2,7 @@
 #define GIM_TRI_COLLISION_H_INCLUDED
 
 /*! \file gim_tri_collision.h
-\author Francisco Len N�jera
+\author Francisco Leon Najera
 */
 /*
 -----------------------------------------------------------------------------
diff --git a/Engine/lib/bullet/src/BulletCollision/Jamfile b/Engine/lib/bullet/src/BulletCollision/Jamfile
deleted file mode 100644
index 127e83f80..000000000
--- a/Engine/lib/bullet/src/BulletCollision/Jamfile
+++ /dev/null
@@ -1,14 +0,0 @@
-
-SubDir TOP src BulletCollision ;
-
-
-Description bulletcollision : "Bullet Collision Detection" ;
-Library bulletcollision :
-  [ Wildcard Gimpact : *.h *.cpp ]
-  [ Wildcard BroadphaseCollision : *.h *.cpp ] 
-  [ Wildcard CollisionDispatch : *.h *.cpp ] 
-  [ Wildcard CollisionShapes : *.h *.cpp ] 
-  [ Wildcard NarrowPhaseCollision : *.h *.cpp ] 
-;
-LibDepends bulletcollision : bulletmath  ;
-
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp
index 9ee83e7d5..940282f57 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp
@@ -22,20 +22,71 @@ subject to the following restrictions:
 
 #include "btGjkPairDetector.h"
 #include "btPointCollector.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
 
 
 
 btContinuousConvexCollision::btContinuousConvexCollision ( const btConvexShape*	convexA,const btConvexShape*	convexB,btSimplexSolverInterface* simplexSolver, btConvexPenetrationDepthSolver* penetrationDepthSolver)
 :m_simplexSolver(simplexSolver),
 m_penetrationDepthSolver(penetrationDepthSolver),
-m_convexA(convexA),m_convexB(convexB)
+m_convexA(convexA),m_convexB1(convexB),m_planeShape(0)
 {
 }
 
+
+btContinuousConvexCollision::btContinuousConvexCollision( const btConvexShape*	convexA,const btStaticPlaneShape*	plane)
+:m_simplexSolver(0),
+m_penetrationDepthSolver(0),
+m_convexA(convexA),m_convexB1(0),m_planeShape(plane)
+{
+}
+
+
 /// This maximum should not be necessary. It allows for untested/degenerate cases in production code.
 /// You don't want your game ever to lock-up.
 #define MAX_ITERATIONS 64
 
+void btContinuousConvexCollision::computeClosestPoints( const btTransform& transA, const btTransform& transB,btPointCollector& pointCollector)
+{
+	if (m_convexB1)
+	{
+		m_simplexSolver->reset();
+		btGjkPairDetector gjk(m_convexA,m_convexB1,m_convexA->getShapeType(),m_convexB1->getShapeType(),m_convexA->getMargin(),m_convexB1->getMargin(),m_simplexSolver,m_penetrationDepthSolver);		
+		btGjkPairDetector::ClosestPointInput input;
+		input.m_transformA = transA;
+		input.m_transformB = transB;
+		gjk.getClosestPoints(input,pointCollector,0);
+	} else
+	{
+		//convex versus plane
+		const btConvexShape* convexShape = m_convexA;
+		const btStaticPlaneShape* planeShape = m_planeShape;
+		
+		const btVector3& planeNormal = planeShape->getPlaneNormal();
+		const btScalar& planeConstant = planeShape->getPlaneConstant();
+		
+		btTransform convexWorldTransform = transA;
+		btTransform convexInPlaneTrans;
+		convexInPlaneTrans= transB.inverse() * convexWorldTransform;
+		btTransform planeInConvex;
+		planeInConvex= convexWorldTransform.inverse() * transB;
+		
+		btVector3 vtx = convexShape->localGetSupportingVertex(planeInConvex.getBasis()*-planeNormal);
+
+		btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+		btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+		btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+		btVector3 vtxInPlaneWorld = transB * vtxInPlaneProjected;
+		btVector3 normalOnSurfaceB = transB.getBasis() * planeNormal;
+
+		pointCollector.addContactPoint(
+			normalOnSurfaceB,
+			vtxInPlaneWorld,
+			distance);
+	}
+}
+
 bool	btContinuousConvexCollision::calcTimeOfImpact(
 				const btTransform& fromA,
 				const btTransform& toA,
@@ -44,7 +95,6 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 				CastResult& result)
 {
 
-	m_simplexSolver->reset();
 
 	/// compute linear and angular velocity for this interval, to interpolate
 	btVector3 linVelA,angVelA,linVelB,angVelB;
@@ -53,7 +103,7 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 
 
 	btScalar boundingRadiusA = m_convexA->getAngularMotionDisc();
-	btScalar boundingRadiusB = m_convexB->getAngularMotionDisc();
+	btScalar boundingRadiusB = m_convexB1?m_convexB1->getAngularMotionDisc():0.f;
 
 	btScalar maxAngularProjectedVelocity = angVelA.length() * boundingRadiusA + angVelB.length() * boundingRadiusB;
 	btVector3 relLinVel = (linVelB-linVelA);
@@ -64,7 +114,6 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 		return false;
 
 
-	btScalar radius = btScalar(0.001);
 
 	btScalar lambda = btScalar(0.);
 	btVector3 v(1,0,0);
@@ -83,28 +132,14 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 	//first solution, using GJK
 
 
-	btTransform identityTrans;
-	identityTrans.setIdentity();
-
-	btSphereShape	raySphere(btScalar(0.0));
-	raySphere.setMargin(btScalar(0.));
-
-
+	btScalar radius = 0.001f;
 //	result.drawCoordSystem(sphereTr);
 
 	btPointCollector	pointCollector1;
 
 	{
-		
-		btGjkPairDetector gjk(m_convexA,m_convexB,m_convexA->getShapeType(),m_convexB->getShapeType(),m_convexA->getMargin(),m_convexB->getMargin(),m_simplexSolver,m_penetrationDepthSolver);		
-		btGjkPairDetector::ClosestPointInput input;
 	
-		//we don't use margins during CCD
-	//	gjk.setIgnoreMargin(true);
-
-		input.m_transformA = fromA;
-		input.m_transformB = fromB;
-		gjk.getClosestPoints(input,pointCollector1,0);
+		computeClosestPoints(fromA,fromB,pointCollector1);
 
 		hasResult = pointCollector1.m_hasResult;
 		c = pointCollector1.m_pointInWorld;
@@ -113,11 +148,12 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 	if (hasResult)
 	{
 		btScalar dist;
-		dist = pointCollector1.m_distance;
+		dist = pointCollector1.m_distance + result.m_allowedPenetration;
 		n = pointCollector1.m_normalOnBInWorld;
-
 		btScalar projectedLinearVelocity = relLinVel.dot(n);
-		
+		if ((projectedLinearVelocity+ maxAngularProjectedVelocity)<=SIMD_EPSILON)
+			return false;
+
 		//not close enough
 		while (dist > radius)
 		{
@@ -125,19 +161,10 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 			{
 				result.m_debugDrawer->drawSphere(c,0.2f,btVector3(1,1,1));
 			}
-			numIter++;
-			if (numIter > maxIter)
-			{
-				return false; //todo: report a failure
-			}
 			btScalar dLambda = btScalar(0.);
 
 			projectedLinearVelocity = relLinVel.dot(n);
 
-			//calculate safe moving fraction from distance / (linear+rotational velocity)
-			
-			//btScalar clippedDist  = GEN_min(angularConservativeRadius,dist);
-			//btScalar clippedDist  = dist;
 			
 			//don't report time of impact for motion away from the contact normal (or causes minor penetration)
 			if ((projectedLinearVelocity+ maxAngularProjectedVelocity)<=SIMD_EPSILON)
@@ -182,37 +209,27 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 			result.DebugDraw( lambda );
 
 			btPointCollector	pointCollector;
-			btGjkPairDetector gjk(m_convexA,m_convexB,m_simplexSolver,m_penetrationDepthSolver);
-			btGjkPairDetector::ClosestPointInput input;
-			input.m_transformA = interpolatedTransA;
-			input.m_transformB = interpolatedTransB;
-			gjk.getClosestPoints(input,pointCollector,0);
+			computeClosestPoints(interpolatedTransA,interpolatedTransB,pointCollector);
+
 			if (pointCollector.m_hasResult)
 			{
-				if (pointCollector.m_distance < btScalar(0.))
-				{
-					//degenerate ?!
-					result.m_fraction = lastLambda;
-					n = pointCollector.m_normalOnBInWorld;
-					result.m_normal=n;//.setValue(1,1,1);// = n;
-					result.m_hitPoint = pointCollector.m_pointInWorld;
-					return true;
-				}
+				dist = pointCollector.m_distance+result.m_allowedPenetration;
 				c = pointCollector.m_pointInWorld;		
 				n = pointCollector.m_normalOnBInWorld;
-				dist = pointCollector.m_distance;
 			} else
 			{
-				//??
+				result.reportFailure(-1, numIter);
 				return false;
 			}
-			
 
+			numIter++;
+			if (numIter > maxIter)
+			{
+				result.reportFailure(-2, numIter);
+				return false;
+			}
 		}
 	
-		if ((projectedLinearVelocity+ maxAngularProjectedVelocity)<=result.m_allowedPenetration)//SIMD_EPSILON)
-			return false;
-			
 		result.m_fraction = lambda;
 		result.m_normal = n;
 		result.m_hitPoint = c;
@@ -221,16 +238,5 @@ bool	btContinuousConvexCollision::calcTimeOfImpact(
 
 	return false;
 
-/*
-//todo:
-	//if movement away from normal, discard result
-	btVector3 move = transBLocalTo.getOrigin() - transBLocalFrom.getOrigin();
-	if (result.m_fraction < btScalar(1.))
-	{
-		if (move.dot(result.m_normal) <= btScalar(0.))
-		{
-		}
-	}
-*/
-
 }
+
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h
index 28c2b4d61..bdc0572f7 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.h
@@ -14,13 +14,14 @@ subject to the following restrictions:
 */
 
 
-#ifndef CONTINUOUS_COLLISION_CONVEX_CAST_H
-#define CONTINUOUS_COLLISION_CONVEX_CAST_H
+#ifndef BT_CONTINUOUS_COLLISION_CONVEX_CAST_H
+#define BT_CONTINUOUS_COLLISION_CONVEX_CAST_H
 
 #include "btConvexCast.h"
 #include "btSimplexSolverInterface.h"
 class btConvexPenetrationDepthSolver;
 class btConvexShape;
+class btStaticPlaneShape;
 
 /// btContinuousConvexCollision implements angular and linear time of impact for convex objects.
 /// Based on Brian Mirtich's Conservative Advancement idea (PhD thesis).
@@ -31,13 +32,18 @@ class btContinuousConvexCollision : public btConvexCast
 	btSimplexSolverInterface* m_simplexSolver;
 	btConvexPenetrationDepthSolver*	m_penetrationDepthSolver;
 	const btConvexShape*	m_convexA;
-	const btConvexShape*	m_convexB;
+	//second object is either a convex or a plane (code sharing)
+	const btConvexShape*	m_convexB1;
+	const btStaticPlaneShape*	m_planeShape;
 
+	void computeClosestPoints( const btTransform& transA, const btTransform& transB,struct btPointCollector& pointCollector);
 
 public:
 
 	btContinuousConvexCollision (const btConvexShape*	shapeA,const btConvexShape*	shapeB ,btSimplexSolverInterface* simplexSolver,btConvexPenetrationDepthSolver* penetrationDepthSolver);
 
+	btContinuousConvexCollision(const btConvexShape*	shapeA,const btStaticPlaneShape*	plane );
+
 	virtual bool	calcTimeOfImpact(
 				const btTransform& fromA,
 				const btTransform& toA,
@@ -48,5 +54,6 @@ public:
 
 };
 
-#endif //CONTINUOUS_COLLISION_CONVEX_CAST_H
+
+#endif //BT_CONTINUOUS_COLLISION_CONVEX_CAST_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexCast.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexCast.h
index b0bce341e..bfd79d03b 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexCast.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexCast.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 */
 
 
-#ifndef CONVEX_CAST_H
-#define CONVEX_CAST_H
+#ifndef BT_CONVEX_CAST_H
+#define BT_CONVEX_CAST_H
 
 #include "LinearMath/btTransform.h"
 #include "LinearMath/btVector3.h"
@@ -39,7 +39,7 @@ public:
 				
 		virtual void	DebugDraw(btScalar	fraction) {(void)fraction;}
 		virtual void	drawCoordSystem(const btTransform& trans) {(void)trans;}
-
+		virtual void	reportFailure(int errNo, int numIterations) {(void)errNo;(void)numIterations;}
 		CastResult()
 			:m_fraction(btScalar(BT_LARGE_FLOAT)),
 			m_debugDrawer(0),
@@ -70,4 +70,4 @@ public:
 					CastResult& result) = 0;
 };
 
-#endif //CONVEX_CAST_H
+#endif //BT_CONVEX_CAST_H
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h
index 7e3fde8e2..72eb5aec4 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btConvexPenetrationDepthSolver.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 */
 
 
-#ifndef __CONVEX_PENETRATION_DEPTH_H
-#define __CONVEX_PENETRATION_DEPTH_H
+#ifndef BT_CONVEX_PENETRATION_DEPTH_H
+#define BT_CONVEX_PENETRATION_DEPTH_H
 
 class btStackAlloc;
 class btVector3;
@@ -38,5 +38,5 @@ public:
 
 
 };
-#endif //CONVEX_PENETRATION_DEPTH_H
+#endif //BT_CONVEX_PENETRATION_DEPTH_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h
index bc711ad49..f958cc523 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h
@@ -14,8 +14,9 @@ subject to the following restrictions:
 */
 
 
-#ifndef DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
-#define DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
+#ifndef BT_DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
+#define BT_DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
+
 #include "LinearMath/btTransform.h"
 #include "LinearMath/btVector3.h"
 class btStackAlloc;
@@ -86,4 +87,5 @@ struct btStorageResult : public btDiscreteCollisionDetectorInterface::Result
 		}
 };
 
-#endif //DISCRETE_COLLISION_DETECTOR_INTERFACE1_H
+#endif //BT_DISCRETE_COLLISION_DETECTOR1_INTERFACE_H
+
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h
index a977c9e83..6a42ee63b 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkConvexCast.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 
 
 
-#ifndef GJK_CONVEX_CAST_H
-#define GJK_CONVEX_CAST_H
+#ifndef BT_GJK_CONVEX_CAST_H
+#define BT_GJK_CONVEX_CAST_H
 
 #include "BulletCollision/CollisionShapes/btCollisionMargin.h"
 
@@ -47,4 +47,4 @@ public:
 
 };
 
-#endif //GJK_CONVEX_CAST_H
+#endif //BT_GJK_CONVEX_CAST_H
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
index f74261d4b..3268f06c2 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp
@@ -511,7 +511,6 @@ namespace gjkepa2_impl
 		{
 			btVector3	n;
 			btScalar	d;
-			btScalar	p;
 			sSV*		c[3];
 			sFace*		f[3];
 			sFace*		l[2];
@@ -657,7 +656,7 @@ namespace gjkepa2_impl
 										remove(m_hull,best);
 										append(m_stock,best);
 										best=findbest();
-										if(best->p>=outer.p) outer=*best;
+										outer=*best;
 									} else { m_status=eStatus::InvalidHull;break; }
 								} else { m_status=eStatus::AccuraryReached;break; }
 							} else { m_status=eStatus::OutOfVertices;break; }
@@ -696,6 +695,42 @@ namespace gjkepa2_impl
 				m_result.p[0]=1;	
 				return(m_status);
 			}
+			bool getedgedist(sFace* face, sSV* a, sSV* b, btScalar& dist)
+			{
+				const btVector3 ba = b->w - a->w;
+				const btVector3 n_ab = btCross(ba, face->n); // Outward facing edge normal direction, on triangle plane
+				const btScalar a_dot_nab = btDot(a->w, n_ab); // Only care about the sign to determine inside/outside, so not normalization required
+
+				if(a_dot_nab < 0)
+				{
+					// Outside of edge a->b
+
+					const btScalar ba_l2 = ba.length2();
+					const btScalar a_dot_ba = btDot(a->w, ba);
+					const btScalar b_dot_ba = btDot(b->w, ba);
+
+					if(a_dot_ba > 0)
+					{
+						// Pick distance vertex a
+						dist = a->w.length();
+					}
+					else if(b_dot_ba < 0)
+					{
+						// Pick distance vertex b
+						dist = b->w.length();
+					}
+					else
+					{
+						// Pick distance to edge a->b
+						const btScalar a_dot_b = btDot(a->w, b->w);
+						dist = btSqrt(btMax((a->w.length2() * b->w.length2() - a_dot_b * a_dot_b) / ba_l2, (btScalar)0));
+					}
+
+					return true;
+				}
+
+				return false;
+			}
 			sFace*				newface(sSV* a,sSV* b,sSV* c,bool forced)
 			{
 				if(m_stock.root)
@@ -710,41 +745,48 @@ namespace gjkepa2_impl
 					face->n		=	btCross(b->w-a->w,c->w-a->w);
 					const btScalar	l=face->n.length();
 					const bool		v=l>EPA_ACCURACY;
-					face->p		=	btMin(btMin(
-						btDot(a->w,btCross(face->n,a->w-b->w)),
-						btDot(b->w,btCross(face->n,b->w-c->w))),
-						btDot(c->w,btCross(face->n,c->w-a->w)))	/
-						(v?l:1);
-					face->p		=	face->p>=-EPA_INSIDE_EPS?0:face->p;
+
 					if(v)
 					{
-						face->d		=	btDot(a->w,face->n)/l;
-						face->n		/=	l;
-						if(forced||(face->d>=-EPA_PLANE_EPS))
+						if(!(getedgedist(face, a, b, face->d) ||
+							 getedgedist(face, b, c, face->d) ||
+							 getedgedist(face, c, a, face->d)))
 						{
-							return(face);
-						} else m_status=eStatus::NonConvex;
-					} else m_status=eStatus::Degenerated;
-					remove(m_hull,face);
-					append(m_stock,face);
-					return(0);
+							// Origin projects to the interior of the triangle
+							// Use distance to triangle plane
+							face->d = btDot(a->w, face->n) / l;
+						}
+
+						face->n /= l;
+						if(forced || (face->d >= -EPA_PLANE_EPS))
+						{
+							return face;
+						}
+						else
+							m_status=eStatus::NonConvex;
+					}
+					else
+						m_status=eStatus::Degenerated;
+
+					remove(m_hull, face);
+					append(m_stock, face);
+					return 0;
+
 				}
-				m_status=m_stock.root?eStatus::OutOfVertices:eStatus::OutOfFaces;
-				return(0);
+				m_status = m_stock.root ? eStatus::OutOfVertices : eStatus::OutOfFaces;
+				return 0;
 			}
 			sFace*				findbest()
 			{
 				sFace*		minf=m_hull.root;
 				btScalar	mind=minf->d*minf->d;
-				btScalar	maxp=minf->p;
 				for(sFace* f=minf->l[1];f;f=f->l[1])
 				{
 					const btScalar	sqd=f->d*f->d;
-					if((f->p>=maxp)&&(sqd<mind))
+					if(sqd<mind)
 					{
 						minf=f;
 						mind=sqd;
-						maxp=f->p;
 					}
 				}
 				return(minf);
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h
index 2296527d7..ac501d5ec 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkEpa2.h
@@ -22,8 +22,9 @@ misrepresented as being the original software.
 /*
 GJK-EPA collision solver by Nathanael Presson, 2008
 */
-#ifndef _68DA1F85_90B7_4bb0_A705_83B4040A75C6_
-#define _68DA1F85_90B7_4bb0_A705_83B4040A75C6_
+#ifndef BT_GJK_EPA2_H
+#define BT_GJK_EPA2_H
+
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
 
 ///btGjkEpaSolver contributed under zlib by Nathanael Presson
@@ -70,4 +71,5 @@ static bool		SignedDistance(	const btConvexShape* shape0,const btTransform& wtrs
 
 };
 
-#endif
+#endif //BT_GJK_EPA2_H
+
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp
index 1a5619573..8af16b9cf 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp
@@ -254,20 +254,21 @@ void btGjkPairDetector::getClosestPointsNonVirtual(const ClosestPointInput& inpu
 			}
 #endif //
 			
-			m_cachedSeparatingAxis = newCachedSeparatingAxis;
 
 			//redundant m_simplexSolver->compute_points(pointOnA, pointOnB);
 
 			//are we getting any closer ?
 			if (previousSquaredDistance - squaredDistance <= SIMD_EPSILON * previousSquaredDistance) 
 			{ 
-				m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
+//				m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
 				checkSimplex = true;
 				m_degenerateSimplex = 12;
 				
 				break;
 			}
 
+			m_cachedSeparatingAxis = newCachedSeparatingAxis;
+
 			  //degeneracy, this is typically due to invalid/uninitialized worldtransforms for a btCollisionObject   
               if (m_curIter++ > gGjkMaxIter)   
               {   
@@ -294,7 +295,7 @@ void btGjkPairDetector::getClosestPointsNonVirtual(const ClosestPointInput& inpu
 			if (!check)
 			{
 				//do we need this backup_closest here ?
-				m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
+//				m_simplexSolver->backup_closest(m_cachedSeparatingAxis);
 				m_degenerateSimplex = 13;
 				break;
 			}
@@ -303,7 +304,7 @@ void btGjkPairDetector::getClosestPointsNonVirtual(const ClosestPointInput& inpu
 		if (checkSimplex)
 		{
 			m_simplexSolver->compute_points(pointOnA, pointOnB);
-			normalInB = pointOnA-pointOnB;
+			normalInB = m_cachedSeparatingAxis;
 			btScalar lenSqr =m_cachedSeparatingAxis.length2();
 			
 			//valid normal
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h
index cc6287c86..f0043b8b9 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btGjkPairDetector.h
@@ -16,8 +16,8 @@ subject to the following restrictions:
 
 
 
-#ifndef GJK_PAIR_DETECTOR_H
-#define GJK_PAIR_DETECTOR_H
+#ifndef BT_GJK_PAIR_DETECTOR_H
+#define BT_GJK_PAIR_DETECTOR_H
 
 #include "btDiscreteCollisionDetectorInterface.h"
 #include "BulletCollision/CollisionShapes/btCollisionMargin.h"
@@ -63,12 +63,12 @@ public:
 	void	getClosestPointsNonVirtual(const ClosestPointInput& input,Result& output,class btIDebugDraw* debugDraw);
 	
 
-	void setMinkowskiA(btConvexShape* minkA)
+	void setMinkowskiA(const btConvexShape* minkA)
 	{
 		m_minkowskiA = minkA;
 	}
 
-	void setMinkowskiB(btConvexShape* minkB)
+	void setMinkowskiB(const btConvexShape* minkB)
 	{
 		m_minkowskiB = minkB;
 	}
@@ -100,4 +100,4 @@ public:
 
 };
 
-#endif //GJK_PAIR_DETECTOR_H
+#endif //BT_GJK_PAIR_DETECTOR_H
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h
index c7c981298..e40fb1d3d 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btManifoldPoint.h
@@ -13,13 +13,27 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef MANIFOLD_CONTACT_POINT_H
-#define MANIFOLD_CONTACT_POINT_H
+#ifndef BT_MANIFOLD_CONTACT_POINT_H
+#define BT_MANIFOLD_CONTACT_POINT_H
 
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btTransformUtil.h"
 
-
+#ifdef PFX_USE_FREE_VECTORMATH
+	#include "physics_effects/base_level/solver/pfx_constraint_row.h"
+typedef sce::PhysicsEffects::PfxConstraintRow btConstraintRow;
+#else
+	// Don't change following order of parameters
+	ATTRIBUTE_ALIGNED16(struct) btConstraintRow {
+		btScalar m_normal[3];
+		btScalar m_rhs;
+		btScalar m_jacDiagInv;
+		btScalar m_lowerLimit;
+		btScalar m_upperLimit;
+		btScalar m_accumImpulse;
+	};
+	typedef btConstraintRow PfxConstraintRow;
+#endif //PFX_USE_FREE_VECTORMATH
 
 
 
@@ -30,10 +44,14 @@ class btManifoldPoint
 		public:
 			btManifoldPoint()
 				:m_userPersistentData(0),
-				m_appliedImpulse(0.f),
 				m_lateralFrictionInitialized(false),
-				m_appliedImpulseLateral1(0.f),
+                m_appliedImpulse(0.f),
+                m_appliedImpulseLateral1(0.f),
 				m_appliedImpulseLateral2(0.f),
+				m_contactMotion1(0.f),
+				m_contactMotion2(0.f),
+				m_contactCFM1(0.f),
+				m_contactCFM2(0.f),
 				m_lifeTime(0)
 			{
 			}
@@ -46,16 +64,20 @@ class btManifoldPoint
 					m_normalWorldOnB( normal ), 
 					m_distance1( distance ),
 					m_combinedFriction(btScalar(0.)),
+					m_combinedRollingFriction(btScalar(0.)),
 					m_combinedRestitution(btScalar(0.)),
 					m_userPersistentData(0),
-					m_appliedImpulse(0.f),
 					m_lateralFrictionInitialized(false),
-					m_appliedImpulseLateral1(0.f),
+                    m_appliedImpulse(0.f),
+                    m_appliedImpulseLateral1(0.f),
 					m_appliedImpulseLateral2(0.f),
+					m_contactMotion1(0.f),
+					m_contactMotion2(0.f),
+					m_contactCFM1(0.f),
+					m_contactCFM2(0.f),
 					m_lifeTime(0)
 			{
 				
-					
 			}
 
 			
@@ -69,25 +91,34 @@ class btManifoldPoint
 		
 			btScalar	m_distance1;
 			btScalar	m_combinedFriction;
+			btScalar	m_combinedRollingFriction;
 			btScalar	m_combinedRestitution;
 
-         //BP mod, store contact triangles.
-         int	   m_partId0;
-         int      m_partId1;
-         int      m_index0;
-         int      m_index1;
+			//BP mod, store contact triangles.
+			int			m_partId0;
+			int			m_partId1;
+			int			m_index0;
+			int			m_index1;
 				
 			mutable void*	m_userPersistentData;
-			btScalar		m_appliedImpulse;
-
 			bool			m_lateralFrictionInitialized;
+
+			btScalar		m_appliedImpulse;
 			btScalar		m_appliedImpulseLateral1;
 			btScalar		m_appliedImpulseLateral2;
+			btScalar		m_contactMotion1;
+			btScalar		m_contactMotion2;
+			btScalar		m_contactCFM1;
+			btScalar		m_contactCFM2;
+
 			int				m_lifeTime;//lifetime of the contactpoint in frames
 			
 			btVector3		m_lateralFrictionDir1;
 			btVector3		m_lateralFrictionDir2;
 
+
+
+
 			btScalar getDistance() const
 			{
 				return m_distance1;
@@ -122,4 +153,4 @@ class btManifoldPoint
 
 	};
 
-#endif //MANIFOLD_CONTACT_POINT_H
+#endif //BT_MANIFOLD_CONTACT_POINT_H
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp
index 8b8238251..fe31f08d6 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp
@@ -20,51 +20,6 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
 
 #define NUM_UNITSPHERE_POINTS 42
-static btVector3	sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
-{
-btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
-btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
-btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
-btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
-btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
-btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
-btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
-btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
-btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
-btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
-btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
-btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
-btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
-btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
-btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
-btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
-btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
-btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
-btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
-btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
-btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
-btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
-btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
-btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
-btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
-btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
-btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
-btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
-btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
-btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
-btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
-btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
-btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
-btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
-btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
-btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
-btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
-btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
-btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
-btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
-btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
-btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
-};
 
 
 bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& simplexSolver,
@@ -133,7 +88,7 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 
 	for (i=0;i<numSampleDirections;i++)
 	{
-		btVector3 norm = sPenetrationDirections[i];
+		btVector3 norm = getPenetrationDirections()[i];
 		seperatingAxisInABatch[i] =  (-norm) * transA.getBasis() ;
 		seperatingAxisInBBatch[i] =  norm   * transB.getBasis() ;
 	}
@@ -147,7 +102,7 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 				btVector3 norm;
 				convexA->getPreferredPenetrationDirection(i,norm);
 				norm  = transA.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
 				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
 				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
 				numSampleDirections++;
@@ -164,7 +119,7 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 				btVector3 norm;
 				convexB->getPreferredPenetrationDirection(i,norm);
 				norm  = transB.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
 				seperatingAxisInABatch[numSampleDirections] = (-norm) * transA.getBasis();
 				seperatingAxisInBBatch[numSampleDirections] = norm * transB.getBasis();
 				numSampleDirections++;
@@ -180,7 +135,7 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 
 	for (i=0;i<numSampleDirections;i++)
 	{
-		btVector3 norm = sPenetrationDirections[i];
+		btVector3 norm = getPenetrationDirections()[i];
 		if (check2d)
 		{
 			norm[2] = 0.f;
@@ -228,7 +183,7 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 				btVector3 norm;
 				convexA->getPreferredPenetrationDirection(i,norm);
 				norm  = transA.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
 				numSampleDirections++;
 			}
 		}
@@ -243,7 +198,7 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 				btVector3 norm;
 				convexB->getPreferredPenetrationDirection(i,norm);
 				norm  = transB.getBasis() * norm;
-				sPenetrationDirections[numSampleDirections] = norm;
+				getPenetrationDirections()[numSampleDirections] = norm;
 				numSampleDirections++;
 			}
 		}
@@ -252,7 +207,7 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 
 	for (int i=0;i<numSampleDirections;i++)
 	{
-		const btVector3& norm = sPenetrationDirections[i];
+		const btVector3& norm = getPenetrationDirections()[i];
 		seperatingAxisInA = (-norm)* transA.getBasis();
 		seperatingAxisInB = norm* transB.getBasis();
 		pInA = convexA->localGetSupportVertexWithoutMarginNonVirtual(seperatingAxisInA);
@@ -353,5 +308,55 @@ bool btMinkowskiPenetrationDepthSolver::calcPenDepth(btSimplexSolverInterface& s
 	return res.m_hasResult;
 }
 
+btVector3*	btMinkowskiPenetrationDepthSolver::getPenetrationDirections()
+{
+	static btVector3	sPenetrationDirections[NUM_UNITSPHERE_POINTS+MAX_PREFERRED_PENETRATION_DIRECTIONS*2] = 
+	{
+	btVector3(btScalar(0.000000) , btScalar(-0.000000),btScalar(-1.000000)),
+	btVector3(btScalar(0.723608) , btScalar(-0.525725),btScalar(-0.447219)),
+	btVector3(btScalar(-0.276388) , btScalar(-0.850649),btScalar(-0.447219)),
+	btVector3(btScalar(-0.894426) , btScalar(-0.000000),btScalar(-0.447216)),
+	btVector3(btScalar(-0.276388) , btScalar(0.850649),btScalar(-0.447220)),
+	btVector3(btScalar(0.723608) , btScalar(0.525725),btScalar(-0.447219)),
+	btVector3(btScalar(0.276388) , btScalar(-0.850649),btScalar(0.447220)),
+	btVector3(btScalar(-0.723608) , btScalar(-0.525725),btScalar(0.447219)),
+	btVector3(btScalar(-0.723608) , btScalar(0.525725),btScalar(0.447219)),
+	btVector3(btScalar(0.276388) , btScalar(0.850649),btScalar(0.447219)),
+	btVector3(btScalar(0.894426) , btScalar(0.000000),btScalar(0.447216)),
+	btVector3(btScalar(-0.000000) , btScalar(0.000000),btScalar(1.000000)),
+	btVector3(btScalar(0.425323) , btScalar(-0.309011),btScalar(-0.850654)),
+	btVector3(btScalar(-0.162456) , btScalar(-0.499995),btScalar(-0.850654)),
+	btVector3(btScalar(0.262869) , btScalar(-0.809012),btScalar(-0.525738)),
+	btVector3(btScalar(0.425323) , btScalar(0.309011),btScalar(-0.850654)),
+	btVector3(btScalar(0.850648) , btScalar(-0.000000),btScalar(-0.525736)),
+	btVector3(btScalar(-0.525730) , btScalar(-0.000000),btScalar(-0.850652)),
+	btVector3(btScalar(-0.688190) , btScalar(-0.499997),btScalar(-0.525736)),
+	btVector3(btScalar(-0.162456) , btScalar(0.499995),btScalar(-0.850654)),
+	btVector3(btScalar(-0.688190) , btScalar(0.499997),btScalar(-0.525736)),
+	btVector3(btScalar(0.262869) , btScalar(0.809012),btScalar(-0.525738)),
+	btVector3(btScalar(0.951058) , btScalar(0.309013),btScalar(0.000000)),
+	btVector3(btScalar(0.951058) , btScalar(-0.309013),btScalar(0.000000)),
+	btVector3(btScalar(0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+	btVector3(btScalar(0.000000) , btScalar(-1.000000),btScalar(0.000000)),
+	btVector3(btScalar(-0.587786) , btScalar(-0.809017),btScalar(0.000000)),
+	btVector3(btScalar(-0.951058) , btScalar(-0.309013),btScalar(-0.000000)),
+	btVector3(btScalar(-0.951058) , btScalar(0.309013),btScalar(-0.000000)),
+	btVector3(btScalar(-0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+	btVector3(btScalar(-0.000000) , btScalar(1.000000),btScalar(-0.000000)),
+	btVector3(btScalar(0.587786) , btScalar(0.809017),btScalar(-0.000000)),
+	btVector3(btScalar(0.688190) , btScalar(-0.499997),btScalar(0.525736)),
+	btVector3(btScalar(-0.262869) , btScalar(-0.809012),btScalar(0.525738)),
+	btVector3(btScalar(-0.850648) , btScalar(0.000000),btScalar(0.525736)),
+	btVector3(btScalar(-0.262869) , btScalar(0.809012),btScalar(0.525738)),
+	btVector3(btScalar(0.688190) , btScalar(0.499997),btScalar(0.525736)),
+	btVector3(btScalar(0.525730) , btScalar(0.000000),btScalar(0.850652)),
+	btVector3(btScalar(0.162456) , btScalar(-0.499995),btScalar(0.850654)),
+	btVector3(btScalar(-0.425323) , btScalar(-0.309011),btScalar(0.850654)),
+	btVector3(btScalar(-0.425323) , btScalar(0.309011),btScalar(0.850654)),
+	btVector3(btScalar(0.162456) , btScalar(0.499995),btScalar(0.850654))
+	};
+
+	return sPenetrationDirections;
+}
 
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h
index 23cbd57ac..6a8fe52f3 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
-#define MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+#ifndef BT_MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+#define BT_MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
 
 #include "btConvexPenetrationDepthSolver.h"
 
@@ -22,6 +22,10 @@ subject to the following restrictions:
 ///Implementation is based on sampling the depth using support mapping, and using GJK step to get the witness points.
 class btMinkowskiPenetrationDepthSolver : public btConvexPenetrationDepthSolver
 {
+protected:
+
+	static btVector3*	getPenetrationDirections();
+
 public:
 
 	virtual bool calcPenDepth( btSimplexSolverInterface& simplexSolver,
@@ -32,5 +36,5 @@ public:
 			);
 };
 
-#endif //MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
+#endif //BT_MINKOWSKI_PENETRATION_DEPTH_SOLVER_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp
index 924a8af87..4d92e853d 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp
@@ -21,7 +21,9 @@ subject to the following restrictions:
 btScalar					gContactBreakingThreshold = btScalar(0.02);
 ContactDestroyedCallback	gContactDestroyedCallback = 0;
 ContactProcessedCallback	gContactProcessedCallback = 0;
-
+///gContactCalcArea3Points will approximate the convex hull area using 3 points
+///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower
+bool						gContactCalcArea3Points = true;
 
 
 btPersistentManifold::btPersistentManifold()
@@ -84,10 +86,28 @@ void btPersistentManifold::clearUserCache(btManifoldPoint& pt)
 	
 }
 
+static inline btScalar calcArea4Points(const btVector3 &p0,const btVector3 &p1,const btVector3 &p2,const btVector3 &p3)
+{
+	// It calculates possible 3 area constructed from random 4 points and returns the biggest one.
+
+	btVector3 a[3],b[3];
+	a[0] = p0 - p1;
+	a[1] = p0 - p2;
+	a[2] = p0 - p3;
+	b[0] = p2 - p3;
+	b[1] = p1 - p3;
+	b[2] = p1 - p2;
+
+	//todo: Following 3 cross production can be easily optimized by SIMD.
+	btVector3 tmp0 = a[0].cross(b[0]);
+	btVector3 tmp1 = a[1].cross(b[1]);
+	btVector3 tmp2 = a[2].cross(b[2]);
+
+	return btMax(btMax(tmp0.length2(),tmp1.length2()),tmp2.length2());
+}
 
 int btPersistentManifold::sortCachedPoints(const btManifoldPoint& pt) 
 {
-
 		//calculate 4 possible cases areas, and take biggest area
 		//also need to keep 'deepest'
 		
@@ -106,6 +126,9 @@ int btPersistentManifold::sortCachedPoints(const btManifoldPoint& pt)
 #endif //KEEP_DEEPEST_POINT
 		
 		btScalar res0(btScalar(0.)),res1(btScalar(0.)),res2(btScalar(0.)),res3(btScalar(0.));
+
+	if (gContactCalcArea3Points)
+	{
 		if (maxPenetrationIndex != 0)
 		{
 			btVector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA;
@@ -136,10 +159,29 @@ int btPersistentManifold::sortCachedPoints(const btManifoldPoint& pt)
 			btVector3 cross = a3.cross(b3);
 			res3 = cross.length2();
 		}
+	} 
+	else
+	{
+		if(maxPenetrationIndex != 0) {
+			res0 = calcArea4Points(pt.m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA);
+		}
 
-		btVector4 maxvec(res0,res1,res2,res3);
-		int biggestarea = maxvec.closestAxis4();
-		return biggestarea;
+		if(maxPenetrationIndex != 1) {
+			res1 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA);
+		}
+
+		if(maxPenetrationIndex != 2) {
+			res2 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[3].m_localPointA);
+		}
+
+		if(maxPenetrationIndex != 3) {
+			res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA);
+		}
+	}
+	btVector4 maxvec(res0,res1,res2,res3);
+	int biggestarea = maxvec.closestAxis4();
+	return biggestarea;
+	
 }
 
 
@@ -163,10 +205,13 @@ int btPersistentManifold::getCacheEntry(const btManifoldPoint& newPoint) const
 	return nearestPoint;
 }
 
-int btPersistentManifold::addManifoldPoint(const btManifoldPoint& newPoint)
+int btPersistentManifold::addManifoldPoint(const btManifoldPoint& newPoint, bool isPredictive)
 {
-	btAssert(validContactDistance(newPoint));
-
+	if (!isPredictive)
+	{
+		btAssert(validContactDistance(newPoint));
+	}
+	
 	int insertIndex = getNumContacts();
 	if (insertIndex == MANIFOLD_CACHE_SIZE)
 	{
@@ -245,7 +290,7 @@ void btPersistentManifold::refreshContactPoints(const btTransform& trA,const btT
 			{
 				//contact point processed callback
 				if (gContactProcessedCallback)
-					(*gContactProcessedCallback)(manifoldPoint,m_body0,m_body1);
+					(*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1);
 			}
 		}
 	}
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h
index 7a30f92c5..2ceaab750 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPersistentManifold.h
@@ -13,13 +13,14 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef PERSISTENT_MANIFOLD_H
-#define PERSISTENT_MANIFOLD_H
+#ifndef BT_PERSISTENT_MANIFOLD_H
+#define BT_PERSISTENT_MANIFOLD_H
 
 
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btTransform.h"
 #include "btManifoldPoint.h"
+class btCollisionObject;
 #include "LinearMath/btAlignedAllocator.h"
 
 struct btCollisionResult;
@@ -30,12 +31,13 @@ extern btScalar gContactBreakingThreshold;
 typedef bool (*ContactDestroyedCallback)(void* userPersistentData);
 typedef bool (*ContactProcessedCallback)(btManifoldPoint& cp,void* body0,void* body1);
 extern ContactDestroyedCallback	gContactDestroyedCallback;
+extern ContactProcessedCallback gContactProcessedCallback;
 
-
+//the enum starts at 1024 to avoid type conflicts with btTypedConstraint
 enum btContactManifoldTypes
 {
-	BT_PERSISTENT_MANIFOLD_TYPE = 1,
-	MAX_CONTACT_MANIFOLD_TYPE
+	MIN_CONTACT_MANIFOLD_TYPE = 1024,
+	BT_PERSISTENT_MANIFOLD_TYPE
 };
 
 #define MANIFOLD_CACHE_SIZE 4
@@ -47,15 +49,18 @@ enum btContactManifoldTypes
 ///reduces the cache to 4 points, when more then 4 points are added, using following rules:
 ///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points
 ///note that some pairs of objects might have more then one contact manifold.
-ATTRIBUTE_ALIGNED16( class) btPersistentManifold : public btTypedObject
+
+
+ATTRIBUTE_ALIGNED128( class) btPersistentManifold : public btTypedObject
+//ATTRIBUTE_ALIGNED16( class) btPersistentManifold : public btTypedObject
 {
 
 	btManifoldPoint m_pointCache[MANIFOLD_CACHE_SIZE];
 
 	/// this two body pointers can point to the physics rigidbody class.
-	/// void* will allow any rigidbody class
-	void* m_body0;
-	void* m_body1;
+	const btCollisionObject* m_body0;
+	const btCollisionObject* m_body1;
+
 	int	m_cachedPoints;
 
 	btScalar	m_contactBreakingThreshold;
@@ -71,11 +76,14 @@ public:
 
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
+	int	m_companionIdA;
+	int	m_companionIdB;
+
 	int m_index1a;
 
 	btPersistentManifold();
 
-	btPersistentManifold(void* body0,void* body1,int , btScalar contactBreakingThreshold,btScalar contactProcessingThreshold)
+	btPersistentManifold(const btCollisionObject* body0,const btCollisionObject* body1,int , btScalar contactBreakingThreshold,btScalar contactProcessingThreshold)
 		: btTypedObject(BT_PERSISTENT_MANIFOLD_TYPE),
 	m_body0(body0),m_body1(body1),m_cachedPoints(0),
 		m_contactBreakingThreshold(contactBreakingThreshold),
@@ -83,13 +91,10 @@ public:
 	{
 	}
 
-	SIMD_FORCE_INLINE void* getBody0() { return m_body0;}
-	SIMD_FORCE_INLINE void* getBody1() { return m_body1;}
+	SIMD_FORCE_INLINE const btCollisionObject* getBody0() const { return m_body0;}
+	SIMD_FORCE_INLINE const btCollisionObject* getBody1() const { return m_body1;}
 
-	SIMD_FORCE_INLINE const void* getBody0() const { return m_body0;}
-	SIMD_FORCE_INLINE const void* getBody1() const { return m_body1;}
-
-	void	setBodies(void* body0,void* body1)
+	void	setBodies(const btCollisionObject* body0,const btCollisionObject* body1)
 	{
 		m_body0 = body0;
 		m_body1 = body1;
@@ -102,6 +107,12 @@ public:
 #endif //
 	
 	SIMD_FORCE_INLINE int	getNumContacts() const { return m_cachedPoints;}
+	/// the setNumContacts API is usually not used, except when you gather/fill all contacts manually
+	void setNumContacts(int cachedPoints)
+	{
+		m_cachedPoints = cachedPoints;
+	}
+
 
 	SIMD_FORCE_INLINE const btManifoldPoint& getContactPoint(int index) const
 	{
@@ -123,9 +134,22 @@ public:
 		return m_contactProcessingThreshold;
 	}
 	
+	void setContactBreakingThreshold(btScalar contactBreakingThreshold)
+	{
+		m_contactBreakingThreshold = contactBreakingThreshold;
+	}
+
+	void setContactProcessingThreshold(btScalar	contactProcessingThreshold)
+	{
+		m_contactProcessingThreshold = contactProcessingThreshold;
+	}
+	
+	
+
+
 	int getCacheEntry(const btManifoldPoint& newPoint) const;
 
-	int addManifoldPoint( const btManifoldPoint& newPoint);
+	int addManifoldPoint( const btManifoldPoint& newPoint, bool isPredictive=false);
 
 	void removeContactPoint (int index)
 	{
@@ -158,7 +182,10 @@ public:
 		btScalar	appliedImpulse = m_pointCache[insertIndex].m_appliedImpulse;
 		btScalar	appliedLateralImpulse1 = m_pointCache[insertIndex].m_appliedImpulseLateral1;
 		btScalar	appliedLateralImpulse2 = m_pointCache[insertIndex].m_appliedImpulseLateral2;
-				
+//		bool isLateralFrictionInitialized = m_pointCache[insertIndex].m_lateralFrictionInitialized;
+		
+		
+			
 		btAssert(lifeTime>=0);
 		void* cache = m_pointCache[insertIndex].m_userPersistentData;
 		
@@ -169,6 +196,11 @@ public:
 		m_pointCache[insertIndex].m_appliedImpulseLateral1 = appliedLateralImpulse1;
 		m_pointCache[insertIndex].m_appliedImpulseLateral2 = appliedLateralImpulse2;
 		
+		m_pointCache[insertIndex].m_appliedImpulse =  appliedImpulse;
+		m_pointCache[insertIndex].m_appliedImpulseLateral1 = appliedLateralImpulse1;
+		m_pointCache[insertIndex].m_appliedImpulseLateral2 = appliedLateralImpulse2;
+
+
 		m_pointCache[insertIndex].m_lifeTime = lifeTime;
 #else
 		clearUserCache(m_pointCache[insertIndex]);
@@ -177,6 +209,7 @@ public:
 #endif
 	}
 
+	
 	bool validContactDistance(const btManifoldPoint& pt) const
 	{
 		return pt.m_distance1 <= getContactBreakingThreshold();
@@ -204,4 +237,4 @@ public:
 
 
 
-#endif //PERSISTENT_MANIFOLD_H
+#endif //BT_PERSISTENT_MANIFOLD_H
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPointCollector.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPointCollector.h
index 6ca60548e..18da17101 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPointCollector.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPointCollector.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef POINT_COLLECTOR_H
-#define POINT_COLLECTOR_H
+#ifndef BT_POINT_COLLECTOR_H
+#define BT_POINT_COLLECTOR_H
 
 #include "btDiscreteCollisionDetectorInterface.h"
 
@@ -60,5 +60,5 @@ struct btPointCollector : public btDiscreteCollisionDetectorInterface::Result
 	}
 };
 
-#endif //POINT_COLLECTOR_H
+#endif //BT_POINT_COLLECTOR_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp
new file mode 100644
index 000000000..b08205e84
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp
@@ -0,0 +1,570 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+///Separating axis rest based on work from Pierre Terdiman, see
+///And contact clipping based on work from Simon Hobbs
+
+
+#include "btPolyhedralContactClipping.h"
+#include "BulletCollision/CollisionShapes/btConvexPolyhedron.h"
+
+#include <float.h> //for FLT_MAX
+
+int gExpectedNbTests=0;
+int gActualNbTests = 0;
+bool gUseInternalObject = true;
+
+// Clips a face to the back of a plane
+void btPolyhedralContactClipping::clipFace(const btVertexArray& pVtxIn, btVertexArray& ppVtxOut, const btVector3& planeNormalWS,btScalar planeEqWS)
+{
+	
+	int ve;
+	btScalar ds, de;
+	int numVerts = pVtxIn.size();
+	if (numVerts < 2)
+		return;
+
+	btVector3 firstVertex=pVtxIn[pVtxIn.size()-1];
+	btVector3 endVertex = pVtxIn[0];
+	
+	ds = planeNormalWS.dot(firstVertex)+planeEqWS;
+
+	for (ve = 0; ve < numVerts; ve++)
+	{
+		endVertex=pVtxIn[ve];
+
+		de = planeNormalWS.dot(endVertex)+planeEqWS;
+
+		if (ds<0)
+		{
+			if (de<0)
+			{
+				// Start < 0, end < 0, so output endVertex
+				ppVtxOut.push_back(endVertex);
+			}
+			else
+			{
+				// Start < 0, end >= 0, so output intersection
+				ppVtxOut.push_back( 	firstVertex.lerp(endVertex,btScalar(ds * 1.f/(ds - de))));
+			}
+		}
+		else
+		{
+			if (de<0)
+			{
+				// Start >= 0, end < 0 so output intersection and end
+				ppVtxOut.push_back(firstVertex.lerp(endVertex,btScalar(ds * 1.f/(ds - de))));
+				ppVtxOut.push_back(endVertex);
+			}
+		}
+		firstVertex = endVertex;
+		ds = de;
+	}
+}
+
+
+static bool TestSepAxis(const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, const btVector3& sep_axis, btScalar& depth, btVector3& witnessPointA, btVector3& witnessPointB)
+{
+	btScalar Min0,Max0;
+	btScalar Min1,Max1;
+	btVector3 witnesPtMinA,witnesPtMaxA;
+	btVector3 witnesPtMinB,witnesPtMaxB;
+
+	hullA.project(transA,sep_axis, Min0, Max0,witnesPtMinA,witnesPtMaxA);
+	hullB.project(transB, sep_axis, Min1, Max1,witnesPtMinB,witnesPtMaxB);
+
+	if(Max0<Min1 || Max1<Min0)
+		return false;
+
+	btScalar d0 = Max0 - Min1;
+	btAssert(d0>=0.0f);
+	btScalar d1 = Max1 - Min0;
+	btAssert(d1>=0.0f);
+	if (d0<d1)
+	{
+		depth = d0;
+		witnessPointA = witnesPtMaxA;
+		witnessPointB = witnesPtMinB;
+
+	} else
+	{
+		depth = d1;
+		witnessPointA = witnesPtMinA;
+		witnessPointB = witnesPtMaxB;
+	}
+	
+	return true;
+}
+
+
+
+static int gActualSATPairTests=0;
+
+inline bool IsAlmostZero(const btVector3& v)
+{
+	if(fabsf(v.x())>1e-6 || fabsf(v.y())>1e-6 || fabsf(v.z())>1e-6)	return false;
+	return true;
+}
+
+#ifdef TEST_INTERNAL_OBJECTS
+
+inline void BoxSupport(const btScalar extents[3], const btScalar sv[3], btScalar p[3])
+{
+	// This version is ~11.000 cycles (4%) faster overall in one of the tests.
+//	IR(p[0]) = IR(extents[0])|(IR(sv[0])&SIGN_BITMASK);
+//	IR(p[1]) = IR(extents[1])|(IR(sv[1])&SIGN_BITMASK);
+//	IR(p[2]) = IR(extents[2])|(IR(sv[2])&SIGN_BITMASK);
+	p[0] = sv[0] < 0.0f ? -extents[0] : extents[0];
+	p[1] = sv[1] < 0.0f ? -extents[1] : extents[1];
+	p[2] = sv[2] < 0.0f ? -extents[2] : extents[2];
+}
+
+void InverseTransformPoint3x3(btVector3& out, const btVector3& in, const btTransform& tr)
+{
+	const btMatrix3x3& rot = tr.getBasis();
+	const btVector3& r0 = rot[0];
+	const btVector3& r1 = rot[1];
+	const btVector3& r2 = rot[2];
+
+	const btScalar x = r0.x()*in.x() + r1.x()*in.y() + r2.x()*in.z();
+	const btScalar y = r0.y()*in.x() + r1.y()*in.y() + r2.y()*in.z();
+	const btScalar z = r0.z()*in.x() + r1.z()*in.y() + r2.z()*in.z();
+
+	out.setValue(x, y, z);
+}
+
+ bool TestInternalObjects( const btTransform& trans0, const btTransform& trans1, const btVector3& delta_c, const btVector3& axis, const btConvexPolyhedron& convex0, const btConvexPolyhedron& convex1, btScalar dmin)
+{
+	const btScalar dp = delta_c.dot(axis);
+
+	btVector3 localAxis0;
+	InverseTransformPoint3x3(localAxis0, axis,trans0);
+	btVector3 localAxis1;
+	InverseTransformPoint3x3(localAxis1, axis,trans1);
+
+	btScalar p0[3];
+	BoxSupport(convex0.m_extents, localAxis0, p0);
+	btScalar p1[3];
+	BoxSupport(convex1.m_extents, localAxis1, p1);
+
+	const btScalar Radius0 = p0[0]*localAxis0.x() + p0[1]*localAxis0.y() + p0[2]*localAxis0.z();
+	const btScalar Radius1 = p1[0]*localAxis1.x() + p1[1]*localAxis1.y() + p1[2]*localAxis1.z();
+
+	const btScalar MinRadius = Radius0>convex0.m_radius ? Radius0 : convex0.m_radius;
+	const btScalar MaxRadius = Radius1>convex1.m_radius ? Radius1 : convex1.m_radius;
+
+	const btScalar MinMaxRadius = MaxRadius + MinRadius;
+	const btScalar d0 = MinMaxRadius + dp;
+	const btScalar d1 = MinMaxRadius - dp;
+
+	const btScalar depth = d0<d1 ? d0:d1;
+	if(depth>dmin)
+		return false;
+	return true;
+}
+#endif //TEST_INTERNAL_OBJECTS
+
+ 
+ 
+ SIMD_FORCE_INLINE void btSegmentsClosestPoints(
+	btVector3& ptsVector,
+	btVector3& offsetA,
+	btVector3& offsetB,
+	btScalar& tA, btScalar& tB,
+	const btVector3& translation,
+	const btVector3& dirA, btScalar hlenA,
+	const btVector3& dirB, btScalar hlenB )
+{
+	// compute the parameters of the closest points on each line segment
+
+	btScalar dirA_dot_dirB = btDot(dirA,dirB);
+	btScalar dirA_dot_trans = btDot(dirA,translation);
+	btScalar dirB_dot_trans = btDot(dirB,translation);
+
+	btScalar denom = 1.0f - dirA_dot_dirB * dirA_dot_dirB;
+
+	if ( denom == 0.0f ) {
+		tA = 0.0f;
+	} else {
+		tA = ( dirA_dot_trans - dirB_dot_trans * dirA_dot_dirB ) / denom;
+		if ( tA < -hlenA )
+			tA = -hlenA;
+		else if ( tA > hlenA )
+			tA = hlenA;
+	}
+
+	tB = tA * dirA_dot_dirB - dirB_dot_trans;
+
+	if ( tB < -hlenB ) {
+		tB = -hlenB;
+		tA = tB * dirA_dot_dirB + dirA_dot_trans;
+
+		if ( tA < -hlenA )
+			tA = -hlenA;
+		else if ( tA > hlenA )
+			tA = hlenA;
+	} else if ( tB > hlenB ) {
+		tB = hlenB;
+		tA = tB * dirA_dot_dirB + dirA_dot_trans;
+
+		if ( tA < -hlenA )
+			tA = -hlenA;
+		else if ( tA > hlenA )
+			tA = hlenA;
+	}
+
+	// compute the closest points relative to segment centers.
+
+	offsetA = dirA * tA;
+	offsetB = dirB * tB;
+
+	ptsVector = translation - offsetA + offsetB;
+}
+
+
+
+bool btPolyhedralContactClipping::findSeparatingAxis(	const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, btVector3& sep, btDiscreteCollisionDetectorInterface::Result& resultOut)
+{
+	gActualSATPairTests++;
+
+//#ifdef TEST_INTERNAL_OBJECTS
+	const btVector3 c0 = transA * hullA.m_localCenter;
+	const btVector3 c1 = transB * hullB.m_localCenter;
+	const btVector3 DeltaC2 = c0 - c1;
+//#endif
+
+	btScalar dmin = FLT_MAX;
+	int curPlaneTests=0;
+
+	int numFacesA = hullA.m_faces.size();
+	// Test normals from hullA
+	for(int i=0;i<numFacesA;i++)
+	{
+		const btVector3 Normal(hullA.m_faces[i].m_plane[0], hullA.m_faces[i].m_plane[1], hullA.m_faces[i].m_plane[2]);
+		btVector3 faceANormalWS = transA.getBasis() * Normal;
+		if (DeltaC2.dot(faceANormalWS)<0)
+			faceANormalWS*=-1.f;
+
+		curPlaneTests++;
+#ifdef TEST_INTERNAL_OBJECTS
+		gExpectedNbTests++;
+		if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))
+			continue;
+		gActualNbTests++;
+#endif
+
+		btScalar d;
+		btVector3 wA,wB;
+		if(!TestSepAxis( hullA, hullB, transA,transB, faceANormalWS, d,wA,wB))
+			return false;
+
+		if(d<dmin)
+		{
+			dmin = d;
+			sep = faceANormalWS;
+		}
+	}
+
+	int numFacesB = hullB.m_faces.size();
+	// Test normals from hullB
+	for(int i=0;i<numFacesB;i++)
+	{
+		const btVector3 Normal(hullB.m_faces[i].m_plane[0], hullB.m_faces[i].m_plane[1], hullB.m_faces[i].m_plane[2]);
+		btVector3 WorldNormal = transB.getBasis() * Normal;
+		if (DeltaC2.dot(WorldNormal)<0)
+			WorldNormal *=-1.f;
+
+		curPlaneTests++;
+#ifdef TEST_INTERNAL_OBJECTS
+		gExpectedNbTests++;
+		if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin))
+			continue;
+		gActualNbTests++;
+#endif
+
+		btScalar d;
+		btVector3 wA,wB;
+		if(!TestSepAxis(hullA, hullB,transA,transB, WorldNormal,d,wA,wB))
+			return false;
+
+		if(d<dmin)
+		{
+			dmin = d;
+			sep = WorldNormal;
+		}
+	}
+
+	btVector3 edgeAstart,edgeAend,edgeBstart,edgeBend;
+	int edgeA=-1;
+	int edgeB=-1;
+	btVector3 worldEdgeA;
+	btVector3 worldEdgeB;
+	btVector3 witnessPointA,witnessPointB;
+	
+
+	int curEdgeEdge = 0;
+	// Test edges
+	for(int e0=0;e0<hullA.m_uniqueEdges.size();e0++)
+	{
+		const btVector3 edge0 = hullA.m_uniqueEdges[e0];
+		const btVector3 WorldEdge0 = transA.getBasis() * edge0;
+		for(int e1=0;e1<hullB.m_uniqueEdges.size();e1++)
+		{
+			const btVector3 edge1 = hullB.m_uniqueEdges[e1];
+			const btVector3 WorldEdge1 = transB.getBasis() * edge1;
+
+			btVector3 Cross = WorldEdge0.cross(WorldEdge1);
+			curEdgeEdge++;
+			if(!IsAlmostZero(Cross))
+			{
+				Cross = Cross.normalize();
+				if (DeltaC2.dot(Cross)<0)
+					Cross *= -1.f;
+
+
+#ifdef TEST_INTERNAL_OBJECTS
+				gExpectedNbTests++;
+				if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin))
+					continue;
+				gActualNbTests++;
+#endif
+
+				btScalar dist;
+				btVector3 wA,wB;
+				if(!TestSepAxis( hullA, hullB, transA,transB, Cross, dist,wA,wB))
+					return false;
+
+				if(dist<dmin)
+				{
+					dmin = dist;
+					sep = Cross;
+					edgeA=e0;
+					edgeB=e1;
+					worldEdgeA = WorldEdge0;
+					worldEdgeB = WorldEdge1;
+					witnessPointA=wA;
+					witnessPointB=wB;
+				}
+			}
+		}
+
+	}
+
+	if (edgeA>=0&&edgeB>=0)
+	{
+//		printf("edge-edge\n");
+		//add an edge-edge contact
+
+		btVector3 ptsVector;
+		btVector3 offsetA;
+		btVector3 offsetB;
+		btScalar tA;
+		btScalar tB;
+
+		btVector3 translation = witnessPointB-witnessPointA;
+
+		btVector3 dirA = worldEdgeA;
+		btVector3 dirB = worldEdgeB;
+		
+		btScalar hlenB = 1e30f;
+		btScalar hlenA = 1e30f;
+
+		btSegmentsClosestPoints(ptsVector,offsetA,offsetB,tA,tB,
+			translation,
+			dirA, hlenA,
+			dirB,hlenB);
+
+		btScalar nlSqrt = ptsVector.length2();
+		if (nlSqrt>SIMD_EPSILON)
+		{
+			btScalar nl = btSqrt(nlSqrt);
+			ptsVector *= 1.f/nl;
+			if (ptsVector.dot(DeltaC2)<0.f)
+			{
+				ptsVector*=-1.f;
+			}
+			btVector3 ptOnB = witnessPointB + offsetB;
+			btScalar distance = nl;
+			resultOut.addContactPoint(ptsVector, ptOnB,-distance);
+		}
+
+	}
+
+
+	if((DeltaC2.dot(sep))<0.0f)
+		sep = -sep;
+
+	return true;
+}
+
+void	btPolyhedralContactClipping::clipFaceAgainstHull(const btVector3& separatingNormal, const btConvexPolyhedron& hullA,  const btTransform& transA, btVertexArray& worldVertsB1, const btScalar minDist, btScalar maxDist,btDiscreteCollisionDetectorInterface::Result& resultOut)
+{
+	btVertexArray worldVertsB2;
+	btVertexArray* pVtxIn = &worldVertsB1;
+	btVertexArray* pVtxOut = &worldVertsB2;
+	pVtxOut->reserve(pVtxIn->size());
+
+	int closestFaceA=-1;
+	{
+		btScalar dmin = FLT_MAX;
+		for(int face=0;face<hullA.m_faces.size();face++)
+		{
+			const btVector3 Normal(hullA.m_faces[face].m_plane[0], hullA.m_faces[face].m_plane[1], hullA.m_faces[face].m_plane[2]);
+			const btVector3 faceANormalWS = transA.getBasis() * Normal;
+		
+			btScalar d = faceANormalWS.dot(separatingNormal);
+			if (d < dmin)
+			{
+				dmin = d;
+				closestFaceA = face;
+			}
+		}
+	}
+	if (closestFaceA<0)
+		return;
+
+	const btFace& polyA = hullA.m_faces[closestFaceA];
+
+		// clip polygon to back of planes of all faces of hull A that are adjacent to witness face
+	int numVerticesA = polyA.m_indices.size();
+	for(int e0=0;e0<numVerticesA;e0++)
+	{
+		const btVector3& a = hullA.m_vertices[polyA.m_indices[e0]];
+		const btVector3& b = hullA.m_vertices[polyA.m_indices[(e0+1)%numVerticesA]];
+		const btVector3 edge0 = a - b;
+		const btVector3 WorldEdge0 = transA.getBasis() * edge0;
+		btVector3 worldPlaneAnormal1 = transA.getBasis()* btVector3(polyA.m_plane[0],polyA.m_plane[1],polyA.m_plane[2]);
+
+		btVector3 planeNormalWS1 = -WorldEdge0.cross(worldPlaneAnormal1);//.cross(WorldEdge0);
+		btVector3 worldA1 = transA*a;
+		btScalar planeEqWS1 = -worldA1.dot(planeNormalWS1);
+		
+//int otherFace=0;
+#ifdef BLA1
+		int otherFace = polyA.m_connectedFaces[e0];
+		btVector3 localPlaneNormal (hullA.m_faces[otherFace].m_plane[0],hullA.m_faces[otherFace].m_plane[1],hullA.m_faces[otherFace].m_plane[2]);
+		btScalar localPlaneEq = hullA.m_faces[otherFace].m_plane[3];
+
+		btVector3 planeNormalWS = transA.getBasis()*localPlaneNormal;
+		btScalar planeEqWS=localPlaneEq-planeNormalWS.dot(transA.getOrigin());
+#else 
+		btVector3 planeNormalWS = planeNormalWS1;
+		btScalar planeEqWS=planeEqWS1;
+		
+#endif
+		//clip face
+
+		clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);
+		btSwap(pVtxIn,pVtxOut);
+		pVtxOut->resize(0);
+	}
+
+
+
+//#define ONLY_REPORT_DEEPEST_POINT
+
+	btVector3 point;
+	
+
+	// only keep points that are behind the witness face
+	{
+		btVector3 localPlaneNormal (polyA.m_plane[0],polyA.m_plane[1],polyA.m_plane[2]);
+		btScalar localPlaneEq = polyA.m_plane[3];
+		btVector3 planeNormalWS = transA.getBasis()*localPlaneNormal;
+		btScalar planeEqWS=localPlaneEq-planeNormalWS.dot(transA.getOrigin());
+		for (int i=0;i<pVtxIn->size();i++)
+		{
+			btVector3 vtx = pVtxIn->at(i);
+			btScalar depth = planeNormalWS.dot(vtx)+planeEqWS;
+			if (depth <=minDist)
+			{
+//				printf("clamped: depth=%f to minDist=%f\n",depth,minDist);
+				depth = minDist;
+			}
+
+			if (depth <=maxDist)
+			{
+				btVector3 point = pVtxIn->at(i);
+#ifdef ONLY_REPORT_DEEPEST_POINT
+				curMaxDist = depth;
+#else
+#if 0
+				if (depth<-3)
+				{
+					printf("error in btPolyhedralContactClipping depth = %f\n", depth);
+					printf("likely wrong separatingNormal passed in\n");
+				} 
+#endif				
+				resultOut.addContactPoint(separatingNormal,point,depth);
+#endif
+			}
+		}
+	}
+#ifdef ONLY_REPORT_DEEPEST_POINT
+	if (curMaxDist<maxDist)
+	{
+		resultOut.addContactPoint(separatingNormal,point,curMaxDist);
+	}
+#endif //ONLY_REPORT_DEEPEST_POINT
+
+}
+
+
+
+
+
+void	btPolyhedralContactClipping::clipHullAgainstHull(const btVector3& separatingNormal1, const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, const btScalar minDist, btScalar maxDist,btDiscreteCollisionDetectorInterface::Result& resultOut)
+{
+
+	btVector3 separatingNormal = separatingNormal1.normalized();
+//	const btVector3 c0 = transA * hullA.m_localCenter;
+//	const btVector3 c1 = transB * hullB.m_localCenter;
+	//const btVector3 DeltaC2 = c0 - c1;
+
+
+
+	int closestFaceB=-1;
+	btScalar dmax = -FLT_MAX;
+	{
+		for(int face=0;face<hullB.m_faces.size();face++)
+		{
+			const btVector3 Normal(hullB.m_faces[face].m_plane[0], hullB.m_faces[face].m_plane[1], hullB.m_faces[face].m_plane[2]);
+			const btVector3 WorldNormal = transB.getBasis() * Normal;
+			btScalar d = WorldNormal.dot(separatingNormal);
+			if (d > dmax)
+			{
+				dmax = d;
+				closestFaceB = face;
+			}
+		}
+	}
+				btVertexArray worldVertsB1;
+				{
+					const btFace& polyB = hullB.m_faces[closestFaceB];
+					const int numVertices = polyB.m_indices.size();
+					for(int e0=0;e0<numVertices;e0++)
+					{
+						const btVector3& b = hullB.m_vertices[polyB.m_indices[e0]];
+						worldVertsB1.push_back(transB*b);
+					}
+				}
+
+	
+	if (closestFaceB>=0)
+		clipFaceAgainstHull(separatingNormal, hullA, transA,worldVertsB1, minDist, maxDist,resultOut);
+
+}
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h
new file mode 100644
index 000000000..b87bd4f32
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.h
@@ -0,0 +1,46 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+///This file was written by Erwin Coumans
+
+
+#ifndef BT_POLYHEDRAL_CONTACT_CLIPPING_H
+#define BT_POLYHEDRAL_CONTACT_CLIPPING_H
+
+
+#include "LinearMath/btAlignedObjectArray.h"
+#include "LinearMath/btTransform.h"
+#include "btDiscreteCollisionDetectorInterface.h"
+
+class btConvexPolyhedron;
+
+typedef btAlignedObjectArray<btVector3> btVertexArray;
+
+// Clips a face to the back of a plane
+struct btPolyhedralContactClipping
+{
+	static void clipHullAgainstHull(const btVector3& separatingNormal, const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, const btScalar minDist, btScalar maxDist, btDiscreteCollisionDetectorInterface::Result& resultOut);
+	static void	clipFaceAgainstHull(const btVector3& separatingNormal, const btConvexPolyhedron& hullA,  const btTransform& transA, btVertexArray& worldVertsB1, const btScalar minDist, btScalar maxDist,btDiscreteCollisionDetectorInterface::Result& resultOut);
+
+	static bool findSeparatingAxis(	const btConvexPolyhedron& hullA, const btConvexPolyhedron& hullB, const btTransform& transA,const btTransform& transB, btVector3& sep, btDiscreteCollisionDetectorInterface::Result& resultOut);
+
+	///the clipFace method is used internally
+	static void clipFace(const btVertexArray& pVtxIn, btVertexArray& ppVtxOut, const btVector3& planeNormalWS,btScalar planeEqWS);
+
+};
+
+#endif // BT_POLYHEDRAL_CONTACT_CLIPPING_H
+
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp
index cdb1d2244..786efd182 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp
@@ -57,12 +57,13 @@ void btTriangleRaycastCallback::processTriangle(btVector3* triangle,int partId,
 	{
 		return ; // same sign
 	}
-   //@BP Mod - Backface filtering
-   if (((m_flags & kF_FilterBackfaces) != 0) && (dist_a > btScalar(0.0)))
-   {
-      // Backface, skip check
-      return;
-   }
+
+	if (((m_flags & kF_FilterBackfaces) != 0) && (dist_a <= btScalar(0.0)))
+	{
+		// Backface, skip check
+		return;
+	}
+
 	
 	const btScalar proj_length=dist_a-dist_b;
 	const btScalar distance = (dist_a)/(proj_length);
@@ -97,18 +98,18 @@ void btTriangleRaycastCallback::processTriangle(btVector3* triangle,int partId,
 					
 					if ( (btScalar)(cp2.dot(triangleNormal)) >=edge_tolerance) 
 					{
-                  //@BP Mod
-                  // Triangle normal isn't normalized
+					  //@BP Mod
+					  // Triangle normal isn't normalized
 				      triangleNormal.normalize();
 
-                  //@BP Mod - Allow for unflipped normal when raycasting against backfaces
-                  if (((m_flags & kF_KeepUnflippedNormal) != 0) || (dist_a <= btScalar(0.0)))
+					 //@BP Mod - Allow for unflipped normal when raycasting against backfaces
+						if (((m_flags & kF_KeepUnflippedNormal) == 0) && (dist_a <= btScalar(0.0)))
 						{
 							m_hitFraction = reportHit(-triangleNormal,distance,partId,triangleIndex);
 						}
 						else
 						{
-                     m_hitFraction = reportHit(triangleNormal,distance,partId,triangleIndex);
+							m_hitFraction = reportHit(triangleNormal,distance,partId,triangleIndex);
 						}
 					}
 				}
@@ -124,8 +125,9 @@ btTriangleConvexcastCallback::btTriangleConvexcastCallback (const btConvexShape*
 	m_convexShapeFrom = convexShapeFrom;
 	m_convexShapeTo = convexShapeTo;
 	m_triangleToWorld = triangleToWorld;
-	m_hitFraction = 1.0;
-    m_triangleCollisionMargin = triangleCollisionMargin;
+	m_hitFraction = 1.0f;
+	m_triangleCollisionMargin = triangleCollisionMargin;
+	m_allowedPenetration = 0.f;
 }
 
 void
@@ -148,6 +150,7 @@ btTriangleConvexcastCallback::processTriangle (btVector3* triangle, int partId,
 	
 	btConvexCast::CastResult castResult;
 	castResult.m_fraction = btScalar(1.);
+	castResult.m_allowedPenetration = m_allowedPenetration;
 	if (convexCaster.calcTimeOfImpact(m_convexShapeFrom,m_convexShapeTo,m_triangleToWorld, m_triangleToWorld, castResult))
 	{
 		//add hit
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h
index 3a1ab388c..f012889a7 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btRaycastCallback.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef RAYCAST_TRI_CALLBACK_H
-#define RAYCAST_TRI_CALLBACK_H
+#ifndef BT_RAYCAST_TRI_CALLBACK_H
+#define BT_RAYCAST_TRI_CALLBACK_H
 
 #include "BulletCollision/CollisionShapes/btTriangleCallback.h"
 #include "LinearMath/btTransform.h"
@@ -58,7 +58,8 @@ public:
 	btTransform m_convexShapeTo;
 	btTransform m_triangleToWorld;
 	btScalar m_hitFraction;
-    btScalar m_triangleCollisionMargin;
+	btScalar m_triangleCollisionMargin;
+	btScalar m_allowedPenetration;
 
 	btTriangleConvexcastCallback (const btConvexShape* convexShape, const btTransform& convexShapeFrom, const btTransform& convexShapeTo, const btTransform& triangleToWorld, const btScalar triangleCollisionMargin);
 
@@ -67,5 +68,5 @@ public:
 	virtual btScalar reportHit (const btVector3& hitNormalLocal, const btVector3& hitPointLocal, btScalar hitFraction, int partId, int triangleIndex) = 0;
 };
 
-#endif //RAYCAST_TRI_CALLBACK_H
+#endif //BT_RAYCAST_TRI_CALLBACK_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h
index 823b4e715..da8a13914 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 
 
 
-#ifndef SIMPLEX_SOLVER_INTERFACE_H
-#define SIMPLEX_SOLVER_INTERFACE_H
+#ifndef BT_SIMPLEX_SOLVER_INTERFACE_H
+#define BT_SIMPLEX_SOLVER_INTERFACE_H
 
 #include "LinearMath/btVector3.h"
 
@@ -59,5 +59,5 @@ class btSimplexSolverInterface
 
 };
 #endif
-#endif //SIMPLEX_SOLVER_INTERFACE_H
+#endif //BT_SIMPLEX_SOLVER_INTERFACE_H
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp
index 4c709a8c3..18eb662de 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp
@@ -114,7 +114,10 @@ bool	btSubsimplexConvexCast::calcTimeOfImpact(
 				hasResult = true;
 			}
 		} 
-		m_simplexSolver->addVertex( w, supVertexA , supVertexB);
+		///Just like regular GJK only add the vertex if it isn't already (close) to current vertex, it would lead to divisions by zero and NaN etc.
+		if (!m_simplexSolver->inSimplex(w))
+			m_simplexSolver->addVertex( w, supVertexA , supVertexB);
+
 		if (m_simplexSolver->closest(v))
 		{
 			dist2 = v.length2();
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h
index 05662db5d..6c8127983 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 */
 
 
-#ifndef SUBSIMPLEX_CONVEX_CAST_H
-#define SUBSIMPLEX_CONVEX_CAST_H
+#ifndef BT_SUBSIMPLEX_CONVEX_CAST_H
+#define BT_SUBSIMPLEX_CONVEX_CAST_H
 
 #include "btConvexCast.h"
 #include "btSimplexSolverInterface.h"
@@ -47,4 +47,4 @@ public:
 
 };
 
-#endif //SUBSIMPLEX_CONVEX_CAST_H
+#endif //BT_SUBSIMPLEX_CONVEX_CAST_H
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp
index b07173330..a775198ab 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.cpp
@@ -289,7 +289,11 @@ bool btVoronoiSimplexSolver::inSimplex(const btVector3& w)
 	//w is in the current (reduced) simplex
 	for (i=0;i<numverts;i++)
 	{
+#ifdef BT_USE_EQUAL_VERTEX_THRESHOLD
+		if ( m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold)
+#else
 		if (m_simplexVectorW[i] == w)
+#endif
 			found = true;
 	}
 
diff --git a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h
index d3162d9fb..2f389e27e 100644
--- a/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h
+++ b/Engine/lib/bullet/src/BulletCollision/NarrowPhaseCollision/btVoronoiSimplexSolver.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 
 
 
-#ifndef btVoronoiSimplexSolver_H
-#define btVoronoiSimplexSolver_H
+#ifndef BT_VORONOI_SIMPLEX_SOLVER_H
+#define BT_VORONOI_SIMPLEX_SOLVER_H
 
 #include "btSimplexSolverInterface.h"
 
@@ -24,6 +24,11 @@ subject to the following restrictions:
 
 #define VORONOI_SIMPLEX_MAX_VERTS 5
 
+///disable next define, or use defaultCollisionConfiguration->getSimplexSolver()->setEqualVertexThreshold(0.f) to disable/configure
+#define BT_USE_EQUAL_VERTEX_THRESHOLD
+#define VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD 0.0001f
+
+
 struct btUsageBitfield{
 	btUsageBitfield()
 	{
@@ -87,13 +92,15 @@ struct	btSubSimplexClosestResult
 /// btVoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin.
 /// Can be used with GJK, as an alternative to Johnson distance algorithm.
 #ifdef NO_VIRTUAL_INTERFACE
-class btVoronoiSimplexSolver
+ATTRIBUTE_ALIGNED16(class) btVoronoiSimplexSolver
 #else
-class btVoronoiSimplexSolver : public btSimplexSolverInterface
+ATTRIBUTE_ALIGNED16(class) btVoronoiSimplexSolver : public btSimplexSolverInterface
 #endif
 {
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	int	m_numVertices;
 
 	btVector3	m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS];
@@ -106,8 +113,11 @@ public:
 	btVector3	m_cachedP2;
 	btVector3	m_cachedV;
 	btVector3	m_lastW;
+	
+	btScalar	m_equalVertexThreshold;
 	bool		m_cachedValidClosest;
 
+
 	btSubSimplexClosestResult m_cachedBC;
 
 	bool	m_needsUpdate;
@@ -122,10 +132,23 @@ public:
 
 public:
 
+	btVoronoiSimplexSolver()
+		:  m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD)
+	{
+	}
 	 void reset();
 
 	 void addVertex(const btVector3& w, const btVector3& p, const btVector3& q);
 
+	 void	setEqualVertexThreshold(btScalar threshold)
+	 {
+		 m_equalVertexThreshold = threshold;
+	 }
+
+	 btScalar	getEqualVertexThreshold() const
+	 {
+		 return m_equalVertexThreshold;
+	 }
 
 	 bool closest(btVector3& v);
 
@@ -154,4 +177,5 @@ public:
 
 };
 
-#endif //VoronoiSimplexSolver
+#endif //BT_VORONOI_SIMPLEX_SOLVER_H
+
diff --git a/Engine/lib/bullet/src/BulletCollision/ibmsdk/Makefile b/Engine/lib/bullet/src/BulletCollision/ibmsdk/Makefile
deleted file mode 100644
index 8a7878326..000000000
--- a/Engine/lib/bullet/src/BulletCollision/ibmsdk/Makefile
+++ /dev/null
@@ -1,112 +0,0 @@
-#### Source code Dirs
-VPATH =				\
-../BroadphaseCollision		\
-../CollisionDispatch		\
-../NarrowPhaseCollision		\
-../CollisionShapes
-
-ROOT = ../../..
-
-#### Library
-LIBRARY_ppu = bulletcollision.a
-
-#### Compiler flags
-CPPFLAGS        = 		\
--DUSE_LIBSPE2	\
--I../BroadphaseCollision		\
--I../CollisionDispath		\
--I../NarrowPhaseCollision		\
--I../CollisionShapes		\
--I$(ROOT)/src/			\
--I$(SDKINC)
-
-#### Optimization level flags
-#CC_OPT_LEVEL =  $(CC_OPT_LEVEL_DEBUG)
-CC_OPT_LEVEL =  -O3
-
-##### Objects to be archived in lib
-
-OBJS = 						\
-btAxisSweep3.o					\
-btQuantizedBvh.o				\
-btBroadphaseProxy.o				\
-btCollisionAlgorithm.o				\
-btDispatcher.o					\
-btDbvtBroadphase.o				\
-btDbvt.o					\
-btOverlappingPairCache.o			\
-btSimpleBroadphase.o				\
-btContinuousConvexCollision.o			\
-btConvexCast.o					\
-btGjkConvexCast.o				\
-btGjkEpa2.o					\
-btGjkEpaPenetrationDepthSolver.o		\
-btGjkPairDetector.o				\
-btDefaultCollisionConfiguration.o		\
-btMinkowskiPenetrationDepthSolver.o		\
-btPersistentManifold.o				\
-btRaycastCallback.o				\
-btSubSimplexConvexCast.o			\
-btVoronoiSimplexSolver.o			\
-btCollisionDispatcher.o				\
-btCollisionObject.o				\
-btCollisionWorld.o				\
-btCompoundCollisionAlgorithm.o			\
-btBoxBoxCollisionAlgorithm.o			\
-btBoxBoxDetector.o				\
-btConvexPlaneCollisionAlgorithm.o		\
-btConvexConcaveCollisionAlgorithm.o		\
-btConvexConvexAlgorithm.o			\
-btDefaultCollisionConfiguration.o		\
-btEmptyCollisionAlgorithm.o			\
-btManifoldResult.o				\
-btSimulationIslandManager.o			\
-btSphereBoxCollisionAlgorithm.o			\
-btSphereSphereCollisionAlgorithm.o		\
-btSphereTriangleCollisionAlgorithm.o		\
-btActivatingCollisionAlgorithm.o		\
-btUnionFind.o					\
-SphereTriangleDetector.o			\
-btBoxShape.o					\
-btBvhTriangleMeshShape.o			\
-btCapsuleShape.o				\
-btCollisionShape.o				\
-btCompoundShape.o				\
-btConcaveShape.o				\
-btConeShape.o					\
-btConvexHullShape.o				\
-btConvexShape.o					\
-btConvexInternalShape.o				\
-btConvexTriangleMeshShape.o			\
-btCylinderShape.o				\
-btEmptyShape.o					\
-btHeightfieldTerrainShape.o			\
-btMinkowskiSumShape.o				\
-btMultiSphereShape.o				\
-btOptimizedBvh.o				\
-btPolyhedralConvexShape.o			\
-btSphereShape.o					\
-btStaticPlaneShape.o				\
-btStridingMeshInterface.o			\
-btTetrahedronShape.o				\
-btTriangleBuffer.o				\
-btTriangleCallback.o				\
-btTriangleIndexVertexArray.o			\
-btTriangleMesh.o				\
-btTriangleMeshShape.o				\
-btUniformScalingShape.o
-
-#### Install directories
-INSTALL_DIR	= $(ROOT)/lib/ibmsdk
-INSTALL_FILES	= $(LIBRARY_ppu)
-
-IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
-
-ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
-        CELL_TOP ?= /opt/cell/sdk
-        include $(CELL_TOP)/buildutils/make.footer
-else
-        CELL_TOP ?= /opt/ibm/cell-sdk/prototype
-        include $(CELL_TOP)/make.footer
-endif
-
diff --git a/Engine/lib/bullet/src/BulletCollision/premake4.lua b/Engine/lib/bullet/src/BulletCollision/premake4.lua
new file mode 100644
index 000000000..9bc0a9e60
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletCollision/premake4.lua
@@ -0,0 +1,11 @@
+	project "BulletCollision"
+		
+	kind "StaticLib"
+	targetdir "../../lib"
+	includedirs {
+		"..",
+	}
+	files {
+		"**.cpp",
+		"**.h"
+	}
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletDynamics/CMakeLists.txt b/Engine/lib/bullet/src/BulletDynamics/CMakeLists.txt
index e4fd766a3..100cd7a88 100644
--- a/Engine/lib/bullet/src/BulletDynamics/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletDynamics/CMakeLists.txt
@@ -1,4 +1,4 @@
-INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src } )
+INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src  )
 
 
 
@@ -6,6 +6,7 @@ SET(BulletDynamics_SRCS
 	Character/btKinematicCharacterController.cpp
 	ConstraintSolver/btConeTwistConstraint.cpp
 	ConstraintSolver/btContactConstraint.cpp
+	ConstraintSolver/btGearConstraint.cpp
 	ConstraintSolver/btGeneric6DofConstraint.cpp
 	ConstraintSolver/btGeneric6DofSpringConstraint.cpp
 	ConstraintSolver/btHinge2Constraint.cpp
@@ -16,7 +17,6 @@ SET(BulletDynamics_SRCS
 	ConstraintSolver/btSolve2LinearConstraint.cpp
 	ConstraintSolver/btTypedConstraint.cpp
 	ConstraintSolver/btUniversalConstraint.cpp
-	Dynamics/btContinuousDynamicsWorld.cpp
 	Dynamics/btDiscreteDynamicsWorld.cpp
 	Dynamics/btRigidBody.cpp
 	Dynamics/btSimpleDynamicsWorld.cpp
@@ -34,6 +34,7 @@ SET(ConstraintSolver_HDRS
 	ConstraintSolver/btConstraintSolver.h
 	ConstraintSolver/btContactConstraint.h
 	ConstraintSolver/btContactSolverInfo.h
+	ConstraintSolver/btGearConstraint.h
 	ConstraintSolver/btGeneric6DofConstraint.h
 	ConstraintSolver/btGeneric6DofSpringConstraint.h
 	ConstraintSolver/btHinge2Constraint.h
@@ -50,7 +51,6 @@ SET(ConstraintSolver_HDRS
 )
 SET(Dynamics_HDRS
 	Dynamics/btActionInterface.h
-	Dynamics/btContinuousDynamicsWorld.h
 	Dynamics/btDiscreteDynamicsWorld.h
 	Dynamics/btDynamicsWorld.h
 	Dynamics/btSimpleDynamicsWorld.h
@@ -85,23 +85,30 @@ IF (BUILD_SHARED_LIBS)
 	TARGET_LINK_LIBRARIES(BulletDynamics BulletCollision LinearMath)
 ENDIF (BUILD_SHARED_LIBS)
 
-IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-	IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-		INSTALL(TARGETS BulletDynamics DESTINATION .)
-	ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	INSTALL(TARGETS BulletDynamics DESTINATION lib)
-	INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
-	ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletDynamics DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletDynamics DESTINATION lib${LIB_SUFFIX})
+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
+".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
+				INSTALL(FILES ../btBulletDynamicsCommon.h
+DESTINATION ${INCLUDE_INSTALL_DIR}/BulletDynamics)
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
 
-IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	SET_TARGET_PROPERTIES(BulletDynamics PROPERTIES FRAMEWORK true)
-	
-	SET_TARGET_PROPERTIES(BulletDynamics PROPERTIES PUBLIC_HEADER "${Root_HDRS}")
-	# Have to list out sub-directories manually:
-	SET_PROPERTY(SOURCE ${ConstraintSolver_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/ConstraintSolver)
-	SET_PROPERTY(SOURCE ${Dynamics_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Dynamics)
-	SET_PROPERTY(SOURCE ${Vehicle_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Vehicle)
-	SET_PROPERTY(SOURCE ${Character_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Character)
-		
-ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletDynamics PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletDynamics PROPERTIES PUBLIC_HEADER "${Root_HDRS}")
+			# Have to list out sub-directories manually:
+			SET_PROPERTY(SOURCE ${ConstraintSolver_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/ConstraintSolver)
+			SET_PROPERTY(SOURCE ${Dynamics_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Dynamics)
+			SET_PROPERTY(SOURCE ${Vehicle_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Vehicle)
+			SET_PROPERTY(SOURCE ${Character_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/Character)
+				
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletDynamics/Character/btCharacterControllerInterface.h b/Engine/lib/bullet/src/BulletDynamics/Character/btCharacterControllerInterface.h
index 19373daa2..c81813c92 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Character/btCharacterControllerInterface.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Character/btCharacterControllerInterface.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CHARACTER_CONTROLLER_INTERFACE_H
-#define CHARACTER_CONTROLLER_INTERFACE_H
+#ifndef BT_CHARACTER_CONTROLLER_INTERFACE_H
+#define BT_CHARACTER_CONTROLLER_INTERFACE_H
 
 #include "LinearMath/btVector3.h"
 #include "BulletDynamics/Dynamics/btActionInterface.h"
@@ -42,4 +42,5 @@ public:
 	virtual bool	onGround () const = 0;
 };
 
-#endif
+#endif //BT_CHARACTER_CONTROLLER_INTERFACE_H
+
diff --git a/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.cpp b/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.cpp
index 4a2d6089e..3b9a7f14c 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.cpp
@@ -23,8 +23,6 @@ subject to the following restrictions:
 #include "LinearMath/btDefaultMotionState.h"
 #include "btKinematicCharacterController.h"
 
-static btVector3 upAxisDirection[3] = { btVector3(1.0f, 0.0f, 0.0f), btVector3(0.0f, 1.0f, 0.0f), btVector3(0.0f, 0.0f, 1.0f) };
-
 
 // static helper method
 static btVector3
@@ -66,20 +64,40 @@ protected:
 class btKinematicClosestNotMeConvexResultCallback : public btCollisionWorld::ClosestConvexResultCallback
 {
 public:
-	btKinematicClosestNotMeConvexResultCallback (btCollisionObject* me) : btCollisionWorld::ClosestConvexResultCallback(btVector3(0.0, 0.0, 0.0), btVector3(0.0, 0.0, 0.0))
+	btKinematicClosestNotMeConvexResultCallback (btCollisionObject* me, const btVector3& up, btScalar minSlopeDot)
+	: btCollisionWorld::ClosestConvexResultCallback(btVector3(0.0, 0.0, 0.0), btVector3(0.0, 0.0, 0.0))
+	, m_me(me)
+	, m_up(up)
+	, m_minSlopeDot(minSlopeDot)
 	{
-		m_me = me;
 	}
 
 	virtual btScalar addSingleResult(btCollisionWorld::LocalConvexResult& convexResult,bool normalInWorldSpace)
 	{
 		if (convexResult.m_hitCollisionObject == m_me)
-			return 1.0;
+			return btScalar(1.0);
+
+		btVector3 hitNormalWorld;
+		if (normalInWorldSpace)
+		{
+			hitNormalWorld = convexResult.m_hitNormalLocal;
+		} else
+		{
+			///need to transform normal into worldspace
+			hitNormalWorld = convexResult.m_hitCollisionObject->getWorldTransform().getBasis()*convexResult.m_hitNormalLocal;
+		}
+
+		btScalar dotUp = m_up.dot(hitNormalWorld);
+		if (dotUp < m_minSlopeDot) {
+			return btScalar(1.0);
+		}
 
 		return ClosestConvexResultCallback::addSingleResult (convexResult, normalInWorldSpace);
 	}
 protected:
 	btCollisionObject* m_me;
+	const btVector3 m_up;
+	btScalar m_minSlopeDot;
 };
 
 /*
@@ -112,7 +130,7 @@ btVector3 btKinematicCharacterController::perpindicularComponent (const btVector
 btKinematicCharacterController::btKinematicCharacterController (btPairCachingGhostObject* ghostObject,btConvexShape* convexShape,btScalar stepHeight, int upAxis)
 {
 	m_upAxis = upAxis;
-	m_addedMargin = 0.02f;
+	m_addedMargin = 0.02;
 	m_walkDirection.setValue(0,0,0);
 	m_useGhostObjectSweepTest = true;
 	m_ghostObject = ghostObject;
@@ -121,6 +139,14 @@ btKinematicCharacterController::btKinematicCharacterController (btPairCachingGho
 	m_convexShape=convexShape;	
 	m_useWalkDirection = true;	// use walk direction by default, legacy behavior
 	m_velocityTimeInterval = 0.0;
+	m_verticalVelocity = 0.0;
+	m_verticalOffset = 0.0;
+	m_gravity = 9.8 * 3 ; // 3G acceleration.
+	m_fallSpeed = 55.0; // Terminal velocity of a sky diver in m/s.
+	m_jumpSpeed = 10.0; // ?
+	m_wasOnGround = false;
+	m_wasJumping = false;
+	setMaxSlope(btRadians(45.0));
 }
 
 btKinematicCharacterController::~btKinematicCharacterController ()
@@ -134,7 +160,21 @@ btPairCachingGhostObject* btKinematicCharacterController::getGhostObject()
 
 bool btKinematicCharacterController::recoverFromPenetration ( btCollisionWorld* collisionWorld)
 {
+	// Here we must refresh the overlapping paircache as the penetrating movement itself or the
+	// previous recovery iteration might have used setWorldTransform and pushed us into an object
+	// that is not in the previous cache contents from the last timestep, as will happen if we
+	// are pushed into a new AABB overlap. Unhandled this means the next convex sweep gets stuck.
+	//
+	// Do this by calling the broadphase's setAabb with the moved AABB, this will update the broadphase
+	// paircache and the ghostobject's internal paircache at the same time.    /BW
 
+	btVector3 minAabb, maxAabb;
+	m_convexShape->getAabb(m_ghostObject->getWorldTransform(), minAabb,maxAabb);
+	collisionWorld->getBroadphase()->setAabb(m_ghostObject->getBroadphaseHandle(), 
+						 minAabb, 
+						 maxAabb, 
+						 collisionWorld->getDispatcher());
+						 
 	bool penetration = false;
 
 	collisionWorld->getDispatcher()->dispatchAllCollisionPairs(m_ghostObject->getOverlappingPairCache(), collisionWorld->getDispatchInfo(), collisionWorld->getDispatcher());
@@ -160,18 +200,20 @@ bool btKinematicCharacterController::recoverFromPenetration ( btCollisionWorld*
 			{
 				const btManifoldPoint&pt = manifold->getContactPoint(p);
 
-				if (pt.getDistance() < 0.0)
+				btScalar dist = pt.getDistance();
+
+				if (dist < 0.0)
 				{
-					if (pt.getDistance() < maxPen)
+					if (dist < maxPen)
 					{
-						maxPen = pt.getDistance();
+						maxPen = dist;
 						m_touchingNormal = pt.m_normalWorldOnB * directionSign;//??
 
 					}
-					m_currentPosition += pt.m_normalWorldOnB * directionSign * pt.getDistance() * btScalar(0.2);
+					m_currentPosition += pt.m_normalWorldOnB * directionSign * dist * btScalar(0.2);
 					penetration = true;
 				} else {
-					//printf("touching %f\n", pt.getDistance());
+					//printf("touching %f\n", dist);
 				}
 			}
 			
@@ -189,16 +231,16 @@ void btKinematicCharacterController::stepUp ( btCollisionWorld* world)
 {
 	// phase 1: up
 	btTransform start, end;
-	m_targetPosition = m_currentPosition + upAxisDirection[m_upAxis] * m_stepHeight;
+	m_targetPosition = m_currentPosition + getUpAxisDirections()[m_upAxis] * (m_stepHeight + (m_verticalOffset > 0.f?m_verticalOffset:0.f));
 
 	start.setIdentity ();
 	end.setIdentity ();
 
 	/* FIXME: Handle penetration properly */
-	start.setOrigin (m_currentPosition + upAxisDirection[m_upAxis] * btScalar(0.1f));
+	start.setOrigin (m_currentPosition + getUpAxisDirections()[m_upAxis] * (m_convexShape->getMargin() + m_addedMargin));
 	end.setOrigin (m_targetPosition);
 
-	btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject);
+	btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject, -getUpAxisDirections()[m_upAxis], btScalar(0.7071));
 	callback.m_collisionFilterGroup = getGhostObject()->getBroadphaseHandle()->m_collisionFilterGroup;
 	callback.m_collisionFilterMask = getGhostObject()->getBroadphaseHandle()->m_collisionFilterMask;
 	
@@ -213,9 +255,15 @@ void btKinematicCharacterController::stepUp ( btCollisionWorld* world)
 	
 	if (callback.hasHit())
 	{
-		// we moved up only a fraction of the step height
-		m_currentStepOffset = m_stepHeight * callback.m_closestHitFraction;
-		m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
+		// Only modify the position if the hit was a slope and not a wall or ceiling.
+		if(callback.m_hitNormalWorld.dot(getUpAxisDirections()[m_upAxis]) > 0.0)
+		{
+			// we moved up only a fraction of the step height
+			m_currentStepOffset = m_stepHeight * callback.m_closestHitFraction;
+			m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
+		}
+		m_verticalVelocity = 0.0;
+		m_verticalOffset = 0.0;
 	} else {
 		m_currentStepOffset = m_stepHeight;
 		m_currentPosition = m_targetPosition;
@@ -265,6 +313,7 @@ void btKinematicCharacterController::stepForwardAndStrafe ( btCollisionWorld* co
 	// phase 2: forward and strafe
 	btTransform start, end;
 	m_targetPosition = m_currentPosition + walkMove;
+
 	start.setIdentity ();
 	end.setIdentity ();
 	
@@ -275,7 +324,9 @@ void btKinematicCharacterController::stepForwardAndStrafe ( btCollisionWorld* co
 	if (m_touchingContact)
 	{
 		if (m_normalizedDirection.dot(m_touchingNormal) > btScalar(0.0))
+		{
 			updateTargetPositionBasedOnCollision (m_touchingNormal);
+		}
 	}
 
 	int maxIter = 10;
@@ -284,8 +335,9 @@ void btKinematicCharacterController::stepForwardAndStrafe ( btCollisionWorld* co
 	{
 		start.setOrigin (m_currentPosition);
 		end.setOrigin (m_targetPosition);
+		btVector3 sweepDirNegative(m_currentPosition - m_targetPosition);
 
-		btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject);
+		btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject, sweepDirNegative, btScalar(0.0));
 		callback.m_collisionFilterGroup = getGhostObject()->getBroadphaseHandle()->m_collisionFilterGroup;
 		callback.m_collisionFilterMask = getGhostObject()->getBroadphaseHandle()->m_collisionFilterMask;
 
@@ -310,18 +362,10 @@ void btKinematicCharacterController::stepForwardAndStrafe ( btCollisionWorld* co
 		if (callback.hasHit())
 		{	
 			// we moved only a fraction
-			btScalar hitDistance = (callback.m_hitPointWorld - m_currentPosition).length();
-			if (hitDistance<0.f)
-			{
-//				printf("neg dist?\n");
-			}
+			btScalar hitDistance;
+			hitDistance = (callback.m_hitPointWorld - m_currentPosition).length();
 
-			/* If the distance is farther than the collision margin, move */
-			if (hitDistance > m_addedMargin)
-			{
-//				printf("callback.m_closestHitFraction=%f\n",callback.m_closestHitFraction);
-				m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
-			}
+//			m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
 
 			updateTargetPositionBasedOnCollision (callback.m_hitNormalWorld);
 			btVector3 currentDir = m_targetPosition - m_currentPosition;
@@ -339,6 +383,7 @@ void btKinematicCharacterController::stepForwardAndStrafe ( btCollisionWorld* co
 //				printf("currentDir: don't normalize a zero vector\n");
 				break;
 			}
+
 		} else {
 			// we moved whole way
 			m_currentPosition = m_targetPosition;
@@ -355,9 +400,21 @@ void btKinematicCharacterController::stepDown ( btCollisionWorld* collisionWorld
 	btTransform start, end;
 
 	// phase 3: down
-	btVector3 step_drop = upAxisDirection[m_upAxis] * m_currentStepOffset;
-	btVector3 gravity_drop = upAxisDirection[m_upAxis] * m_stepHeight; 
-	m_targetPosition -= (step_drop + gravity_drop);
+	/*btScalar additionalDownStep = (m_wasOnGround && !onGround()) ? m_stepHeight : 0.0;
+	btVector3 step_drop = getUpAxisDirections()[m_upAxis] * (m_currentStepOffset + additionalDownStep);
+	btScalar downVelocity = (additionalDownStep == 0.0 && m_verticalVelocity<0.0?-m_verticalVelocity:0.0) * dt;
+	btVector3 gravity_drop = getUpAxisDirections()[m_upAxis] * downVelocity; 
+	m_targetPosition -= (step_drop + gravity_drop);*/
+
+	btScalar downVelocity = (m_verticalVelocity<0.f?-m_verticalVelocity:0.f) * dt;
+	if(downVelocity > 0.0 && downVelocity < m_stepHeight
+		&& (m_wasOnGround || !m_wasJumping))
+	{
+		downVelocity = m_stepHeight;
+	}
+
+	btVector3 step_drop = getUpAxisDirections()[m_upAxis] * (m_currentStepOffset + downVelocity);
+	m_targetPosition -= step_drop;
 
 	start.setIdentity ();
 	end.setIdentity ();
@@ -365,7 +422,7 @@ void btKinematicCharacterController::stepDown ( btCollisionWorld* collisionWorld
 	start.setOrigin (m_currentPosition);
 	end.setOrigin (m_targetPosition);
 
-	btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject);
+	btKinematicClosestNotMeConvexResultCallback callback (m_ghostObject, getUpAxisDirections()[m_upAxis], m_maxSlopeCosine);
 	callback.m_collisionFilterGroup = getGhostObject()->getBroadphaseHandle()->m_collisionFilterGroup;
 	callback.m_collisionFilterMask = getGhostObject()->getBroadphaseHandle()->m_collisionFilterMask;
 	
@@ -381,6 +438,9 @@ void btKinematicCharacterController::stepDown ( btCollisionWorld* collisionWorld
 	{
 		// we dropped a fraction of the height -> hit floor
 		m_currentPosition.setInterpolate3 (m_currentPosition, m_targetPosition, callback.m_closestHitFraction);
+		m_verticalVelocity = 0.0;
+		m_verticalOffset = 0.0;
+		m_wasJumping = false;
 	} else {
 		// we dropped the full height
 		
@@ -411,7 +471,7 @@ btScalar timeInterval
 //	printf("setVelocity!\n");
 //	printf("  interval: %f\n", timeInterval);
 //	printf("  velocity: (%f, %f, %f)\n",
-//	    velocity.x(), velocity.y(), velocity.z());
+//		 velocity.x(), velocity.y(), velocity.z());
 
 	m_useWalkDirection = false;
 	m_walkDirection = velocity;
@@ -445,7 +505,7 @@ void btKinematicCharacterController::preStep (  btCollisionWorld* collisionWorld
 		m_touchingContact = true;
 		if (numPenetrationLoops > 4)
 		{
-//			printf("character could not recover from penetration = %d\n", numPenetrationLoops);
+			//printf("character could not recover from penetration = %d\n", numPenetrationLoops);
 			break;
 		}
 	}
@@ -457,6 +517,8 @@ void btKinematicCharacterController::preStep (  btCollisionWorld* collisionWorld
 	
 }
 
+#include <stdio.h>
+
 void btKinematicCharacterController::playerStep (  btCollisionWorld* collisionWorld, btScalar dt)
 {
 //	printf("playerStep(): ");
@@ -468,6 +530,21 @@ void btKinematicCharacterController::playerStep (  btCollisionWorld* collisionWo
 		return;		// no motion
 	}
 
+	m_wasOnGround = onGround();
+
+	// Update fall velocity.
+	m_verticalVelocity -= m_gravity * dt;
+	if(m_verticalVelocity > 0.0 && m_verticalVelocity > m_jumpSpeed)
+	{
+		m_verticalVelocity = m_jumpSpeed;
+	}
+	if(m_verticalVelocity < 0.0 && btFabs(m_verticalVelocity) > btFabs(m_fallSpeed))
+	{
+		m_verticalVelocity = -btFabs(m_fallSpeed);
+	}
+	m_verticalOffset = m_verticalVelocity * dt;
+
+
 	btTransform xform;
 	xform = m_ghostObject->getWorldTransform ();
 
@@ -481,13 +558,13 @@ void btKinematicCharacterController::playerStep (  btCollisionWorld* collisionWo
 		//printf("  time: %f", m_velocityTimeInterval);
 		// still have some time left for moving!
 		btScalar dtMoving =
-		   (dt < m_velocityTimeInterval) ? dt : m_velocityTimeInterval;
+			(dt < m_velocityTimeInterval) ? dt : m_velocityTimeInterval;
 		m_velocityTimeInterval -= dt;
 
 		// how far will we move while we are moving?
 		btVector3 move = m_walkDirection * dtMoving;
 
-		// printf("  dtMoving: %f", dtMoving);
+		//printf("  dtMoving: %f", dtMoving);
 
 		// okay, step
 		stepForwardAndStrafe(collisionWorld, move);
@@ -525,6 +602,9 @@ void btKinematicCharacterController::jump ()
 	if (!canJump())
 		return;
 
+	m_verticalVelocity = m_jumpSpeed;
+	m_wasJumping = true;
+
 #if 0
 	currently no jumping.
 	btTransform xform;
@@ -536,12 +616,40 @@ void btKinematicCharacterController::jump ()
 #endif
 }
 
+void btKinematicCharacterController::setGravity(btScalar gravity)
+{
+	m_gravity = gravity;
+}
+
+btScalar btKinematicCharacterController::getGravity() const
+{
+	return m_gravity;
+}
+
+void btKinematicCharacterController::setMaxSlope(btScalar slopeRadians)
+{
+	m_maxSlopeRadians = slopeRadians;
+	m_maxSlopeCosine = btCos(slopeRadians);
+}
+
+btScalar btKinematicCharacterController::getMaxSlope() const
+{
+	return m_maxSlopeRadians;
+}
+
 bool btKinematicCharacterController::onGround () const
 {
-	return true;
+	return m_verticalVelocity == 0.0 && m_verticalOffset == 0.0;
 }
 
 
-void	btKinematicCharacterController::debugDraw(btIDebugDraw* debugDrawer)
+btVector3* btKinematicCharacterController::getUpAxisDirections()
+{
+	static btVector3 sUpAxisDirection[3] = { btVector3(1.0f, 0.0f, 0.0f), btVector3(0.0f, 1.0f, 0.0f), btVector3(0.0f, 0.0f, 1.0f) };
+	
+	return sUpAxisDirection;
+}
+
+void btKinematicCharacterController::debugDraw(btIDebugDraw* debugDrawer)
 {
 }
diff --git a/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.h b/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.h
index 4fc56c056..8ec63735c 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Character/btKinematicCharacterController.h
@@ -13,8 +13,9 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef KINEMATIC_CHARACTER_CONTROLLER_H
-#define KINEMATIC_CHARACTER_CONTROLLER_H
+
+#ifndef BT_KINEMATIC_CHARACTER_CONTROLLER_H
+#define BT_KINEMATIC_CHARACTER_CONTROLLER_H
 
 #include "LinearMath/btVector3.h"
 
@@ -24,6 +25,7 @@ subject to the following restrictions:
 
 
 class btCollisionShape;
+class btConvexShape;
 class btRigidBody;
 class btCollisionWorld;
 class btCollisionDispatcher;
@@ -32,17 +34,23 @@ class btPairCachingGhostObject;
 ///btKinematicCharacterController is an object that supports a sliding motion in a world.
 ///It uses a ghost object and convex sweep test to test for upcoming collisions. This is combined with discrete collision detection to recover from penetrations.
 ///Interaction between btKinematicCharacterController and dynamic rigid bodies needs to be explicity implemented by the user.
-class btKinematicCharacterController : public btCharacterControllerInterface
+ATTRIBUTE_ALIGNED16(class) btKinematicCharacterController : public btCharacterControllerInterface
 {
 protected:
+
 	btScalar m_halfHeight;
 	
 	btPairCachingGhostObject* m_ghostObject;
 	btConvexShape*	m_convexShape;//is also in m_ghostObject, but it needs to be convex, so we store it here to avoid upcast
 	
+	btScalar m_verticalVelocity;
+	btScalar m_verticalOffset;
 	btScalar m_fallSpeed;
 	btScalar m_jumpSpeed;
 	btScalar m_maxJumpHeight;
+	btScalar m_maxSlopeRadians; // Slope angle that is set (used for returning the exact value)
+	btScalar m_maxSlopeCosine;  // Cosine equivalent of m_maxSlopeRadians (calculated once when set, for optimization)
+	btScalar m_gravity;
 
 	btScalar m_turnAngle;
 	
@@ -65,11 +73,15 @@ protected:
 	bool m_touchingContact;
 	btVector3 m_touchingNormal;
 
+	bool  m_wasOnGround;
+	bool  m_wasJumping;
 	bool	m_useGhostObjectSweepTest;
 	bool	m_useWalkDirection;
-	float	m_velocityTimeInterval;
+	btScalar	m_velocityTimeInterval;
 	int m_upAxis;
-	
+
+	static btVector3* getUpAxisDirections();
+
 	btVector3 computeReflectionDirection (const btVector3& direction, const btVector3& normal);
 	btVector3 parallelComponent (const btVector3& direction, const btVector3& normal);
 	btVector3 perpindicularComponent (const btVector3& direction, const btVector3& normal);
@@ -80,6 +92,9 @@ protected:
 	void stepForwardAndStrafe (btCollisionWorld* collisionWorld, const btVector3& walkMove);
 	void stepDown (btCollisionWorld* collisionWorld, btScalar dt);
 public:
+
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btKinematicCharacterController (btPairCachingGhostObject* ghostObject,btConvexShape* convexShape,btScalar stepHeight, int upAxis = 1);
 	~btKinematicCharacterController ();
 	
@@ -105,14 +120,14 @@ public:
 
 	/// This should probably be called setPositionIncrementPerSimulatorStep.
 	/// This is neither a direction nor a velocity, but the amount to
-	///   increment the position each simulation iteration, regardless
-	///   of dt.
+	///	increment the position each simulation iteration, regardless
+	///	of dt.
 	/// This call will reset any velocity set by setVelocityForTimeInterval().
 	virtual void	setWalkDirection(const btVector3& walkDirection);
 
 	/// Caller provides a velocity with which the character should move for
-	///   the given time period.  After the time period, velocity is reset
-	///   to zero.
+	///	the given time period.  After the time period, velocity is reset
+	///	to zero.
 	/// This call will reset any walk direction set by setWalkDirection().
 	/// Negative time intervals will result in no motion.
 	virtual void setVelocityForTimeInterval(const btVector3& velocity,
@@ -128,8 +143,17 @@ public:
 	void setJumpSpeed (btScalar jumpSpeed);
 	void setMaxJumpHeight (btScalar maxJumpHeight);
 	bool canJump () const;
+
 	void jump ();
 
+	void setGravity(btScalar gravity);
+	btScalar getGravity() const;
+
+	/// The max slope determines the maximum angle that the controller can walk up.
+	/// The slope angle is measured in radians.
+	void setMaxSlope(btScalar slopeRadians);
+	btScalar getMaxSlope() const;
+
 	btPairCachingGhostObject* getGhostObject();
 	void	setUseGhostSweepTest(bool useGhostObjectSweepTest)
 	{
@@ -139,4 +163,4 @@ public:
 	bool onGround () const;
 };
 
-#endif // KINEMATIC_CHARACTER_CONTROLLER_H
+#endif // BT_KINEMATIC_CHARACTER_CONTROLLER_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp
index 03a851219..45589275e 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.cpp
@@ -36,11 +36,6 @@ SIMD_FORCE_INLINE btScalar computeAngularImpulseDenominator(const btVector3& axi
 }
 
 
-btConeTwistConstraint::btConeTwistConstraint()
-:btTypedConstraint(CONETWIST_CONSTRAINT_TYPE),
-m_useSolveConstraintObsolete(CONETWIST_USE_OBSOLETE_SOLVER)
-{
-}
 
 
 btConeTwistConstraint::btConeTwistConstraint(btRigidBody& rbA,btRigidBody& rbB, 
@@ -73,6 +68,10 @@ void btConeTwistConstraint::init()
 	setLimit(btScalar(BT_LARGE_FLOAT), btScalar(BT_LARGE_FLOAT), btScalar(BT_LARGE_FLOAT));
 	m_damping = btScalar(0.01);
 	m_fixThresh = CONETWIST_DEF_FIX_THRESH;
+	m_flags = 0;
+	m_linCFM = btScalar(0.f);
+	m_linERP = btScalar(0.7f);
+	m_angCFM = btScalar(0.f);
 }
 
 
@@ -145,13 +144,18 @@ void btConeTwistConstraint::getInfo2NonVirtual (btConstraintInfo2* info,const bt
 		a2.getSkewSymmetricMatrix(angular0,angular1,angular2);
 	}
     // set right hand side
-    btScalar k = info->fps * info->erp;
+	btScalar linERP = (m_flags & BT_CONETWIST_FLAGS_LIN_ERP) ? m_linERP : info->erp;
+    btScalar k = info->fps * linERP;
     int j;
 	for (j=0; j<3; j++)
     {
         info->m_constraintError[j*info->rowskip] = k * (a2[j] + transB.getOrigin()[j] - a1[j] - transA.getOrigin()[j]);
 		info->m_lowerLimit[j*info->rowskip] = -SIMD_INFINITY;
 		info->m_upperLimit[j*info->rowskip] = SIMD_INFINITY;
+		if(m_flags & BT_CONETWIST_FLAGS_LIN_CFM)
+		{
+			info->cfm[j*info->rowskip] = m_linCFM;
+		}
     }
 	int row = 3;
     int srow = row * info->rowskip;
@@ -200,7 +204,10 @@ void btConeTwistConstraint::getInfo2NonVirtual (btConstraintInfo2* info,const bt
 			btScalar k = info->fps * m_biasFactor;
 
 			info->m_constraintError[srow] = k * m_swingCorrection;
-			info->cfm[srow] = 0.0f;
+			if(m_flags & BT_CONETWIST_FLAGS_ANG_CFM)
+			{
+				info->cfm[srow] = m_angCFM;
+			}
 			// m_swingCorrection is always positive or 0
 			info->m_lowerLimit[srow] = 0;
 			info->m_upperLimit[srow] = SIMD_INFINITY;
@@ -220,7 +227,10 @@ void btConeTwistConstraint::getInfo2NonVirtual (btConstraintInfo2* info,const bt
 		J2[srow+2] = -ax1[2];
 		btScalar k = info->fps * m_biasFactor;
 		info->m_constraintError[srow] = k * m_twistCorrection;
-		info->cfm[srow] = 0.0f;
+		if(m_flags & BT_CONETWIST_FLAGS_ANG_CFM)
+		{
+			info->cfm[srow] = m_angCFM;
+		}
 		if(m_twistSpan > 0.0f)
 		{
 
@@ -311,9 +321,9 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 			btVector3 rel_pos2 = pivotBInW - m_rbB.getCenterOfMassPosition();
 
 			btVector3 vel1;
-			bodyA.getVelocityInLocalPointObsolete(rel_pos1,vel1);
+			bodyA.internalGetVelocityInLocalPointObsolete(rel_pos1,vel1);
 			btVector3 vel2;
-			bodyB.getVelocityInLocalPointObsolete(rel_pos2,vel2);
+			bodyB.internalGetVelocityInLocalPointObsolete(rel_pos2,vel2);
 			btVector3 vel = vel1 - vel2;
 
 			for (int i=0;i<3;i++)
@@ -330,8 +340,8 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 				
 				btVector3 ftorqueAxis1 = rel_pos1.cross(normal);
 				btVector3 ftorqueAxis2 = rel_pos2.cross(normal);
-				bodyA.applyImpulse(normal*m_rbA.getInvMass(), m_rbA.getInvInertiaTensorWorld()*ftorqueAxis1,impulse);
-				bodyB.applyImpulse(normal*m_rbB.getInvMass(), m_rbB.getInvInertiaTensorWorld()*ftorqueAxis2,-impulse);
+				bodyA.internalApplyImpulse(normal*m_rbA.getInvMass(), m_rbA.getInvInertiaTensorWorld()*ftorqueAxis1,impulse);
+				bodyB.internalApplyImpulse(normal*m_rbB.getInvMass(), m_rbB.getInvInertiaTensorWorld()*ftorqueAxis2,-impulse);
 		
 			}
 		}
@@ -342,8 +352,8 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 			// compute current and predicted transforms
 			btTransform trACur = m_rbA.getCenterOfMassTransform();
 			btTransform trBCur = m_rbB.getCenterOfMassTransform();
-			btVector3 omegaA; bodyA.getAngularVelocity(omegaA);
-			btVector3 omegaB; bodyB.getAngularVelocity(omegaB);
+			btVector3 omegaA; bodyA.internalGetAngularVelocity(omegaA);
+			btVector3 omegaB; bodyB.internalGetAngularVelocity(omegaB);
 			btTransform trAPred; trAPred.setIdentity(); 
 			btVector3 zerovec(0,0,0);
 			btTransformUtil::integrateTransform(
@@ -417,15 +427,15 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 				btScalar  impulseMag  = impulse.length();
 				btVector3 impulseAxis =  impulse / impulseMag;
 
-				bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*impulseAxis, impulseMag);
-				bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*impulseAxis, -impulseMag);
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*impulseAxis, impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*impulseAxis, -impulseMag);
 
 			}
 		}
 		else if (m_damping > SIMD_EPSILON) // no motor: do a little damping
 		{
-			btVector3 angVelA; bodyA.getAngularVelocity(angVelA);
-			btVector3 angVelB; bodyB.getAngularVelocity(angVelB);
+			btVector3 angVelA; bodyA.internalGetAngularVelocity(angVelA);
+			btVector3 angVelB; bodyB.internalGetAngularVelocity(angVelB);
 			btVector3 relVel = angVelB - angVelA;
 			if (relVel.length2() > SIMD_EPSILON)
 			{
@@ -437,8 +447,8 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 
 				btScalar  impulseMag  = impulse.length();
 				btVector3 impulseAxis = impulse / impulseMag;
-				bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*impulseAxis, impulseMag);
-				bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*impulseAxis, -impulseMag);
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*impulseAxis, impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*impulseAxis, -impulseMag);
 			}
 		}
 
@@ -446,9 +456,9 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 		{
 			///solve angular part
 			btVector3 angVelA;
-			bodyA.getAngularVelocity(angVelA);
+			bodyA.internalGetAngularVelocity(angVelA);
 			btVector3 angVelB;
-			bodyB.getAngularVelocity(angVelB);
+			bodyB.internalGetAngularVelocity(angVelB);
 
 			// solve swing limit
 			if (m_solveSwingLimit)
@@ -477,8 +487,8 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 				impulseMag = impulse.length();
 				btVector3 noTwistSwingAxis = impulse / impulseMag;
 
-				bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*noTwistSwingAxis, impulseMag);
-				bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*noTwistSwingAxis, -impulseMag);
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*noTwistSwingAxis, impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*noTwistSwingAxis, -impulseMag);
 			}
 
 
@@ -496,10 +506,10 @@ void	btConeTwistConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolver
 				m_accTwistLimitImpulse = btMax(m_accTwistLimitImpulse + impulseMag, btScalar(0.0) );
 				impulseMag = m_accTwistLimitImpulse - temp;
 
-				btVector3 impulse = m_twistAxis * impulseMag;
+		//		btVector3 impulse = m_twistAxis * impulseMag;
 
-				bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*m_twistAxis,impulseMag);
-				bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*m_twistAxis,-impulseMag);
+				bodyA.internalApplyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*m_twistAxis,impulseMag);
+				bodyB.internalApplyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*m_twistAxis,-impulseMag);
 			}		
 		}
 	}
@@ -628,6 +638,11 @@ void btConeTwistConstraint::calcAngleInfo2(const btTransform& transA, const btTr
 		btTransform trDeltaAB = trB * trPose * trA.inverse();
 		btQuaternion qDeltaAB = trDeltaAB.getRotation();
 		btVector3 swingAxis = 	btVector3(qDeltaAB.x(), qDeltaAB.y(), qDeltaAB.z());
+		float swingAxisLen2 = swingAxis.length2();
+		if(btFuzzyZero(swingAxisLen2))
+		{
+		   return;
+		}
 		m_swingAxis = swingAxis;
 		m_swingAxis.normalize();
 		m_swingCorrection = qDeltaAB.getAngle();
@@ -813,12 +828,11 @@ void btConeTwistConstraint::computeConeLimitInfo(const btQuaternion& qCone,
 	{
 		vSwingAxis = btVector3(qCone.x(), qCone.y(), qCone.z());
 		vSwingAxis.normalize();
-		if (fabs(vSwingAxis.x()) > SIMD_EPSILON)
-		{
-			// non-zero twist?! this should never happen.
-			int wtf = 0; wtf = wtf;
-		}
-
+#if 0
+        // non-zero twist?! this should never happen.
+       btAssert(fabs(vSwingAxis.x()) <= SIMD_EPSILON));
+#endif
+        
 		// Compute limit for given swing. tricky:
 		// Given a swing axis, we're looking for the intersection with the bounding cone ellipse.
 		// (Since we're dealing with angles, this ellipse is embedded on the surface of a sphere.)
@@ -862,8 +876,10 @@ void btConeTwistConstraint::computeConeLimitInfo(const btQuaternion& qCone,
 	else if (swingAngle < 0)
 	{
 		// this should never happen!
-		int wtf = 0; wtf = wtf;
-	}
+#if 0
+        btAssert(0);
+#endif
+ 	}
 }
 
 btVector3 btConeTwistConstraint::GetPointForAngle(btScalar fAngleInRadians, btScalar fLength) const
@@ -908,13 +924,15 @@ void btConeTwistConstraint::computeTwistLimitInfo(const btQuaternion& qTwist,
 
 	if (twistAngle > SIMD_PI) // long way around. flip quat and recalculate.
 	{
-		qMinTwist = operator-(qTwist);
+		qMinTwist = -(qTwist);
 		twistAngle = qMinTwist.getAngle();
 	}
 	if (twistAngle < 0)
 	{
 		// this should never happen
-		int wtf = 0; wtf = wtf;			
+#if 0
+        btAssert(0);
+#endif
 	}
 
 	vTwistAxis = btVector3(qMinTwist.x(), qMinTwist.y(), qMinTwist.z());
@@ -961,10 +979,10 @@ void btConeTwistConstraint::setMotorTarget(const btQuaternion &q)
 {
 	btTransform trACur = m_rbA.getCenterOfMassTransform();
 	btTransform trBCur = m_rbB.getCenterOfMassTransform();
-	btTransform trABCur = trBCur.inverse() * trACur;
-	btQuaternion qABCur = trABCur.getRotation();
-	btTransform trConstraintCur = (trBCur * m_rbBFrame).inverse() * (trACur * m_rbAFrame);
-	btQuaternion qConstraintCur = trConstraintCur.getRotation();
+//	btTransform trABCur = trBCur.inverse() * trACur;
+//	btQuaternion qABCur = trABCur.getRotation();
+//	btTransform trConstraintCur = (trBCur * m_rbBFrame).inverse() * (trACur * m_rbAFrame);
+	//btQuaternion qConstraintCur = trConstraintCur.getRotation();
 
 	btQuaternion qConstraint = m_rbBFrame.getRotation().inverse() * q * m_rbAFrame.getRotation();
 	setMotorTargetInConstraintSpace(qConstraint);
@@ -1021,6 +1039,97 @@ void btConeTwistConstraint::setMotorTargetInConstraintSpace(const btQuaternion &
 	}
 }
 
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btConeTwistConstraint::setParam(int num, btScalar value, int axis)
+{
+	switch(num)
+	{
+		case BT_CONSTRAINT_ERP :
+		case BT_CONSTRAINT_STOP_ERP :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				m_linERP = value;
+				m_flags |= BT_CONETWIST_FLAGS_LIN_ERP;
+			}
+			else
+			{
+				m_biasFactor = value;
+			}
+			break;
+		case BT_CONSTRAINT_CFM :
+		case BT_CONSTRAINT_STOP_CFM :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				m_linCFM = value;
+				m_flags |= BT_CONETWIST_FLAGS_LIN_CFM;
+			}
+			else
+			{
+				m_angCFM = value;
+				m_flags |= BT_CONETWIST_FLAGS_ANG_CFM;
+			}
+			break;
+		default:
+			btAssertConstrParams(0);
+			break;
+	}
+}
+
+///return the local value of parameter
+btScalar btConeTwistConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal = 0;
+	switch(num)
+	{
+		case BT_CONSTRAINT_ERP :
+		case BT_CONSTRAINT_STOP_ERP :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				btAssertConstrParams(m_flags & BT_CONETWIST_FLAGS_LIN_ERP);
+				retVal = m_linERP;
+			}
+			else if((axis >= 3) && (axis < 6)) 
+			{
+				retVal = m_biasFactor;
+			}
+			else
+			{
+				btAssertConstrParams(0);
+			}
+			break;
+		case BT_CONSTRAINT_CFM :
+		case BT_CONSTRAINT_STOP_CFM :
+			if((axis >= 0) && (axis < 3)) 
+			{
+				btAssertConstrParams(m_flags & BT_CONETWIST_FLAGS_LIN_CFM);
+				retVal = m_linCFM;
+			}
+			else if((axis >= 3) && (axis < 6)) 
+			{
+				btAssertConstrParams(m_flags & BT_CONETWIST_FLAGS_ANG_CFM);
+				retVal = m_angCFM;
+			}
+			else
+			{
+				btAssertConstrParams(0);
+			}
+			break;
+		default : 
+			btAssertConstrParams(0);
+	}
+	return retVal;
+}
 
 
+void btConeTwistConstraint::setFrames(const btTransform & frameA, const btTransform & frameB)
+{
+	m_rbAFrame = frameA;
+	m_rbBFrame = frameB;
+	buildJacobian();
+	//calculateTransforms();
+}
+
+ 
+
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h
index db1e061db..09c048bed 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConeTwistConstraint.h
@@ -33,8 +33,8 @@ and swing 1 and 2 are along the z and y axes respectively.
 
 
 
-#ifndef CONETWISTCONSTRAINT_H
-#define CONETWISTCONSTRAINT_H
+#ifndef BT_CONETWISTCONSTRAINT_H
+#define BT_CONETWISTCONSTRAINT_H
 
 #include "LinearMath/btVector3.h"
 #include "btJacobianEntry.h"
@@ -42,9 +42,15 @@ and swing 1 and 2 are along the z and y axes respectively.
 
 class btRigidBody;
 
+enum btConeTwistFlags
+{
+	BT_CONETWIST_FLAGS_LIN_CFM = 1,
+	BT_CONETWIST_FLAGS_LIN_ERP = 2,
+	BT_CONETWIST_FLAGS_ANG_CFM = 4
+};
 
 ///btConeTwistConstraint can be used to simulate ragdoll joints (upper arm, leg etc)
-class btConeTwistConstraint : public btTypedConstraint
+ATTRIBUTE_ALIGNED16(class) btConeTwistConstraint : public btTypedConstraint
 {
 #ifdef IN_PARALLELL_SOLVER
 public:
@@ -99,14 +105,33 @@ public:
 	btScalar	 m_maxMotorImpulse;
 	btVector3	 m_accMotorImpulse;
 	
+	// parameters
+	int			m_flags;
+	btScalar	m_linCFM;
+	btScalar	m_linERP;
+	btScalar	m_angCFM;
+	
+protected:
+
+	void init();
+
+	void computeConeLimitInfo(const btQuaternion& qCone, // in
+		btScalar& swingAngle, btVector3& vSwingAxis, btScalar& swingLimit); // all outs
+
+	void computeTwistLimitInfo(const btQuaternion& qTwist, // in
+		btScalar& twistAngle, btVector3& vTwistAxis); // all outs
+
+	void adjustSwingAxisToUseEllipseNormal(btVector3& vSwingAxis) const;
+
+
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btConeTwistConstraint(btRigidBody& rbA,btRigidBody& rbB,const btTransform& rbAFrame, const btTransform& rbBFrame);
 	
 	btConeTwistConstraint(btRigidBody& rbA,const btTransform& rbAFrame);
 
-	btConeTwistConstraint();
-
 	virtual void	buildJacobian();
 
 	virtual void getInfo1 (btConstraintInfo1* info);
@@ -119,8 +144,10 @@ public:
 
 	virtual	void	solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep);
 
+    
 	void	updateRHS(btScalar	timeStep);
 
+
 	const btRigidBody& getRigidBodyA() const
 	{
 		return m_rbA;
@@ -221,7 +248,6 @@ public:
 	}
 	bool isPastSwingLimit() { return m_solveSwingLimit; }
 
-
 	void setDamping(btScalar damping) { m_damping = damping; }
 
 	void enableMotor(bool b) { m_bMotorEnabled = b; }
@@ -242,18 +268,82 @@ public:
 
 	btVector3 GetPointForAngle(btScalar fAngleInRadians, btScalar fLength) const;
 
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void setParam(int num, btScalar value, int axis = -1);
+
+	virtual void setFrames(const btTransform& frameA, const btTransform& frameB);
+
+	const btTransform& getFrameOffsetA() const
+	{
+		return m_rbAFrame;
+	}
+
+	const btTransform& getFrameOffsetB() const
+	{
+		return m_rbBFrame;
+	}
 
 
-protected:
-	void init();
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
 
-	void computeConeLimitInfo(const btQuaternion& qCone, // in
-		btScalar& swingAngle, btVector3& vSwingAxis, btScalar& swingLimit); // all outs
+	virtual	int	calculateSerializeBufferSize() const;
 
-	void computeTwistLimitInfo(const btQuaternion& qTwist, // in
-		btScalar& twistAngle, btVector3& vTwistAxis); // all outs
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
 
-	void adjustSwingAxisToUseEllipseNormal(btVector3& vSwingAxis) const;
 };
 
-#endif //CONETWISTCONSTRAINT_H
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btConeTwistConstraintData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame;
+	btTransformFloatData m_rbBFrame;
+
+	//limits
+	float	m_swingSpan1;
+	float	m_swingSpan2;
+	float	m_twistSpan;
+	float	m_limitSoftness;
+	float	m_biasFactor;
+	float	m_relaxationFactor;
+
+	float	m_damping;
+		
+	char m_pad[4];
+
+};
+	
+
+
+SIMD_FORCE_INLINE int	btConeTwistConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btConeTwistConstraintData);
+
+}
+
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE const char*	btConeTwistConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btConeTwistConstraintData* cone = (btConeTwistConstraintData*) dataBuffer;
+	btTypedConstraint::serialize(&cone->m_typeConstraintData,serializer);
+
+	m_rbAFrame.serializeFloat(cone->m_rbAFrame);
+	m_rbBFrame.serializeFloat(cone->m_rbBFrame);
+	
+	cone->m_swingSpan1 = float(m_swingSpan1);
+	cone->m_swingSpan2 = float(m_swingSpan2);
+	cone->m_twistSpan = float(m_twistSpan);
+	cone->m_limitSoftness = float(m_limitSoftness);
+	cone->m_biasFactor = float(m_biasFactor);
+	cone->m_relaxationFactor = float(m_relaxationFactor);
+	cone->m_damping = float(m_damping);
+
+	return "btConeTwistConstraintData";
+}
+
+
+#endif //BT_CONETWISTCONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConstraintSolver.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConstraintSolver.h
index 7a8e9c195..6f673102b 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConstraintSolver.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btConstraintSolver.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONSTRAINT_SOLVER_H
-#define CONSTRAINT_SOLVER_H
+#ifndef BT_CONSTRAINT_SOLVER_H
+#define BT_CONSTRAINT_SOLVER_H
 
 #include "LinearMath/btScalar.h"
 
@@ -49,4 +49,4 @@ public:
 
 
 
-#endif //CONSTRAINT_SOLVER_H
+#endif //BT_CONSTRAINT_SOLVER_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.cpp
index 72116c6ba..9d60d9957 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.cpp
@@ -22,10 +22,7 @@ subject to the following restrictions:
 #include "LinearMath/btMinMax.h"
 #include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
 
-btContactConstraint::btContactConstraint()
-:btTypedConstraint(CONTACT_CONSTRAINT_TYPE)
-{
-}
+
 
 btContactConstraint::btContactConstraint(btPersistentManifold* contactManifold,btRigidBody& rbA,btRigidBody& rbB)
 :btTypedConstraint(CONTACT_CONSTRAINT_TYPE,rbA,rbB),
@@ -59,10 +56,6 @@ void	btContactConstraint::buildJacobian()
 
 }
 
-void	btContactConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep)
-{
-
-}
 
 
 
@@ -75,9 +68,53 @@ void	btContactConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolverBo
 #include "LinearMath/btMinMax.h"
 #include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
 
-#define ASSERT2 btAssert
 
-#define USE_INTERNAL_APPLY_IMPULSE 1
+
+//response  between two dynamic objects without friction and no restitution, assuming 0 penetration depth
+btScalar resolveSingleCollision(
+        btRigidBody* body1,
+        btCollisionObject* colObj2,
+		const btVector3& contactPositionWorld,
+		const btVector3& contactNormalOnB,
+        const btContactSolverInfo& solverInfo,
+		btScalar distance)
+{
+	btRigidBody* body2 = btRigidBody::upcast(colObj2);
+    
+	
+    const btVector3& normal = contactNormalOnB;
+
+    btVector3 rel_pos1 = contactPositionWorld - body1->getWorldTransform().getOrigin(); 
+    btVector3 rel_pos2 = contactPositionWorld - colObj2->getWorldTransform().getOrigin();
+    
+    btVector3 vel1 = body1->getVelocityInLocalPoint(rel_pos1);
+	btVector3 vel2 = body2? body2->getVelocityInLocalPoint(rel_pos2) : btVector3(0,0,0);
+    btVector3 vel = vel1 - vel2;
+    btScalar rel_vel;
+    rel_vel = normal.dot(vel);
+    
+    btScalar combinedRestitution = 0.f;
+    btScalar restitution = combinedRestitution* -rel_vel;
+
+    btScalar positionalError = solverInfo.m_erp *-distance /solverInfo.m_timeStep ;
+    btScalar velocityError = -(1.0f + restitution) * rel_vel;// * damping;
+	btScalar denom0 = body1->computeImpulseDenominator(contactPositionWorld,normal);
+	btScalar denom1 = body2? body2->computeImpulseDenominator(contactPositionWorld,normal) : 0.f;
+	btScalar relaxation = 1.f;
+	btScalar jacDiagABInv = relaxation/(denom0+denom1);
+
+    btScalar penetrationImpulse = positionalError * jacDiagABInv;
+    btScalar velocityImpulse = velocityError * jacDiagABInv;
+
+    btScalar normalImpulse = penetrationImpulse+velocityImpulse;
+    normalImpulse = 0.f > normalImpulse ? 0.f: normalImpulse;
+
+	body1->applyImpulse(normal*(normalImpulse), rel_pos1);
+    if (body2)
+		body2->applyImpulse(-normal*(normalImpulse), rel_pos2);
+    
+    return normalImpulse;
+}
 
 
 //bilateral constraint between two dynamic objects
@@ -90,7 +127,7 @@ void resolveSingleBilateral(btRigidBody& body1, const btVector3& pos1,
 
 
 	btScalar normalLenSqr = normal.length2();
-	ASSERT2(btFabs(normalLenSqr) < btScalar(1.1));
+	btAssert(btFabs(normalLenSqr) < btScalar(1.1));
 	if (normalLenSqr > btScalar(1.1))
 	{
 		impulse = btScalar(0.);
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.h
index 481b89e54..477c79d17 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactConstraint.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONTACT_CONSTRAINT_H
-#define CONTACT_CONSTRAINT_H
+#ifndef BT_CONTACT_CONSTRAINT_H
+#define BT_CONTACT_CONSTRAINT_H
 
 #include "LinearMath/btVector3.h"
 #include "btJacobianEntry.h"
@@ -30,7 +30,6 @@ protected:
 
 public:
 
-	btContactConstraint();
 
 	btContactConstraint(btPersistentManifold* contactManifold,btRigidBody& rbA,btRigidBody& rbB);
 
@@ -55,11 +54,12 @@ public:
 	///obsolete methods
 	virtual void	buildJacobian();
 
-	///obsolete methods
-	virtual	void	solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep);
 
 };
 
+///very basic collision resolution without friction
+btScalar resolveSingleCollision(btRigidBody* body1, class btCollisionObject* colObj2, const btVector3& contactPositionWorld,const btVector3& contactNormalOnB, const struct btContactSolverInfo& solverInfo,btScalar distance);
+
 
 ///resolveSingleBilateral is an obsolete methods used for vehicle friction between two dynamic objects
 void resolveSingleBilateral(btRigidBody& body1, const btVector3& pos1,
@@ -68,4 +68,4 @@ void resolveSingleBilateral(btRigidBody& body1, const btVector3& pos1,
 
 
 
-#endif //CONTACT_CONSTRAINT_H
+#endif //BT_CONTACT_CONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactSolverInfo.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
index 3293c8de9..c07e9bbd8 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
@@ -13,21 +13,23 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef CONTACT_SOLVER_INFO
-#define CONTACT_SOLVER_INFO
+#ifndef BT_CONTACT_SOLVER_INFO
+#define BT_CONTACT_SOLVER_INFO
+
+#include "LinearMath/btScalar.h"
 
 enum	btSolverMode
 {
 	SOLVER_RANDMIZE_ORDER = 1,
 	SOLVER_FRICTION_SEPARATE = 2,
 	SOLVER_USE_WARMSTARTING = 4,
-	SOLVER_USE_FRICTION_WARMSTARTING = 8,
 	SOLVER_USE_2_FRICTION_DIRECTIONS = 16,
 	SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32,
 	SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64,
 	SOLVER_CACHE_FRIENDLY = 128,
-	SOLVER_SIMD = 256,	//enabled for Windows, the solver innerloop is branchless SIMD, 40% faster than FPU/scalar version
-	SOLVER_CUDA = 512	//will be open sourced during Game Developers Conference 2009. Much faster.
+	SOLVER_SIMD = 256,
+	SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS = 512,
+	SOLVER_ALLOW_ZERO_LENGTH_FRICTION_DIRECTIONS = 1024
 };
 
 struct btContactSolverInfoData
@@ -35,7 +37,7 @@ struct btContactSolverInfoData
 	
 
 	btScalar	m_tau;
-	btScalar	m_damping;
+	btScalar	m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
 	btScalar	m_friction;
 	btScalar	m_timeStep;
 	btScalar	m_restitution;
@@ -47,11 +49,15 @@ struct btContactSolverInfoData
 	btScalar	m_globalCfm;//constraint force mixing
 	int			m_splitImpulse;
 	btScalar	m_splitImpulsePenetrationThreshold;
+	btScalar	m_splitImpulseTurnErp;
 	btScalar	m_linearSlop;
 	btScalar	m_warmstartingFactor;
 
 	int			m_solverMode;
 	int	m_restingContactRestitutionThreshold;
+	int			m_minimumSolverBatchSize;
+	btScalar	m_maxGyroscopicForce;
+	btScalar	m_singleAxisRollingFrictionThreshold;
 
 
 };
@@ -66,20 +72,88 @@ struct btContactSolverInfo : public btContactSolverInfoData
 		m_tau = btScalar(0.6);
 		m_damping = btScalar(1.0);
 		m_friction = btScalar(0.3);
+		m_timeStep = btScalar(1.f/60.f);
 		m_restitution = btScalar(0.);
 		m_maxErrorReduction = btScalar(20.);
 		m_numIterations = 10;
 		m_erp = btScalar(0.2);
-		m_erp2 = btScalar(0.1);
+		m_erp2 = btScalar(0.8);
 		m_globalCfm = btScalar(0.);
 		m_sor = btScalar(1.);
-		m_splitImpulse = false;
-		m_splitImpulsePenetrationThreshold = -0.02f;
+		m_splitImpulse = true;
+		m_splitImpulsePenetrationThreshold = -.04f;
+		m_splitImpulseTurnErp = 0.1f;
 		m_linearSlop = btScalar(0.0);
 		m_warmstartingFactor=btScalar(0.85);
+		//m_solverMode =  SOLVER_USE_WARMSTARTING |  SOLVER_SIMD | SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION|SOLVER_USE_2_FRICTION_DIRECTIONS|SOLVER_ENABLE_FRICTION_DIRECTION_CACHING;// | SOLVER_RANDMIZE_ORDER;
 		m_solverMode = SOLVER_USE_WARMSTARTING | SOLVER_SIMD;// | SOLVER_RANDMIZE_ORDER;
-		m_restingContactRestitutionThreshold = 2;//resting contact lifetime threshold to disable restitution
+		m_restingContactRestitutionThreshold = 2;//unused as of 2.81
+		m_minimumSolverBatchSize = 128; //try to combine islands until the amount of constraints reaches this limit
+		m_maxGyroscopicForce = 100.f; ///only used to clamp forces for bodies that have their BT_ENABLE_GYROPSCOPIC_FORCE flag set (using btRigidBody::setFlag)
+		m_singleAxisRollingFrictionThreshold = 1e30f;///if the velocity is above this threshold, it will use a single constraint row (axis), otherwise 3 rows.
 	}
 };
 
-#endif //CONTACT_SOLVER_INFO
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btContactSolverInfoDoubleData
+{
+	double		m_tau;
+	double		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
+	double		m_friction;
+	double		m_timeStep;
+	double		m_restitution;
+	double		m_maxErrorReduction;
+	double		m_sor;
+	double		m_erp;//used as Baumgarte factor
+	double		m_erp2;//used in Split Impulse
+	double		m_globalCfm;//constraint force mixing
+	double		m_splitImpulsePenetrationThreshold;
+	double		m_splitImpulseTurnErp;
+	double		m_linearSlop;
+	double		m_warmstartingFactor;
+	double		m_maxGyroscopicForce;
+	double		m_singleAxisRollingFrictionThreshold;
+
+	int			m_numIterations;
+	int			m_solverMode;
+	int			m_restingContactRestitutionThreshold;
+	int			m_minimumSolverBatchSize;
+	int			m_splitImpulse;
+	char		m_padding[4];
+
+};
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btContactSolverInfoFloatData
+{
+	float		m_tau;
+	float		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
+	float		m_friction;
+	float		m_timeStep;
+
+	float		m_restitution;
+	float		m_maxErrorReduction;
+	float		m_sor;
+	float		m_erp;//used as Baumgarte factor
+
+	float		m_erp2;//used in Split Impulse
+	float		m_globalCfm;//constraint force mixing
+	float		m_splitImpulsePenetrationThreshold;
+	float		m_splitImpulseTurnErp;
+
+	float		m_linearSlop;
+	float		m_warmstartingFactor;
+	float		m_maxGyroscopicForce;
+	float		m_singleAxisRollingFrictionThreshold;
+
+	int			m_numIterations;
+	int			m_solverMode;
+	int			m_restingContactRestitutionThreshold;
+	int			m_minimumSolverBatchSize;
+
+	int			m_splitImpulse;
+	char		m_padding[4];
+};
+
+
+
+#endif //BT_CONTACT_SOLVER_INFO
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGearConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGearConstraint.cpp
new file mode 100644
index 000000000..bcd457b67
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGearConstraint.cpp
@@ -0,0 +1,54 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2012 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+/// Implemented by Erwin Coumans. The idea for the constraint comes from Dimitris Papavasiliou.
+
+#include "btGearConstraint.h"
+
+btGearConstraint::btGearConstraint(btRigidBody& rbA, btRigidBody& rbB, const btVector3& axisInA,const btVector3& axisInB, btScalar ratio)
+:btTypedConstraint(GEAR_CONSTRAINT_TYPE,rbA,rbB),
+m_axisInA(axisInA),
+m_axisInB(axisInB),
+m_ratio(ratio)
+{
+}
+
+btGearConstraint::~btGearConstraint ()
+{
+}
+
+void btGearConstraint::getInfo1 (btConstraintInfo1* info)
+{
+	info->m_numConstraintRows = 1;
+	info->nub = 1;
+}
+
+void btGearConstraint::getInfo2 (btConstraintInfo2* info)
+{
+	btVector3 globalAxisA, globalAxisB;
+
+	globalAxisA = m_rbA.getWorldTransform().getBasis()*this->m_axisInA;
+	globalAxisB = m_rbB.getWorldTransform().getBasis()*this->m_axisInB;
+
+	info->m_J1angularAxis[0] = globalAxisA[0];
+	info->m_J1angularAxis[1] = globalAxisA[1];
+	info->m_J1angularAxis[2] = globalAxisA[2];
+
+	info->m_J2angularAxis[0] = m_ratio*globalAxisB[0];
+	info->m_J2angularAxis[1] = m_ratio*globalAxisB[1];
+	info->m_J2angularAxis[2] = m_ratio*globalAxisB[2];
+
+}
+
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGearConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGearConstraint.h
new file mode 100644
index 000000000..60f600948
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGearConstraint.h
@@ -0,0 +1,56 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2012 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+
+#ifndef BT_GEAR_CONSTRAINT_H
+#define BT_GEAR_CONSTRAINT_H
+
+#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
+///The btGeatConstraint will couple the angular velocity for two bodies around given local axis and ratio.
+///See Bullet/Demos/ConstraintDemo for an example use.
+class btGearConstraint : public btTypedConstraint
+{
+protected:
+	btVector3	m_axisInA;
+	btVector3	m_axisInB;
+	bool		m_useFrameA;
+	btScalar	m_ratio;
+
+public:
+	btGearConstraint(btRigidBody& rbA, btRigidBody& rbB, const btVector3& axisInA,const btVector3& axisInB, btScalar ratio=1.f);
+	virtual ~btGearConstraint ();
+
+	///internal method used by the constraint solver, don't use them directly
+	virtual void getInfo1 (btConstraintInfo1* info);
+
+	///internal method used by the constraint solver, don't use them directly
+	virtual void getInfo2 (btConstraintInfo2* info);
+
+	virtual	void	setParam(int num, btScalar value, int axis = -1) 
+	{
+		btAssert(0);
+	};
+
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const 
+	{ 
+		btAssert(0);
+		return 0.f;
+	}
+
+};
+
+#endif //BT_GEAR_CONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp
index b24f35615..d4b4a9ad4 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp
@@ -28,14 +28,10 @@ http://gimpact.sf.net
 
 
 #define D6_USE_OBSOLETE_METHOD false
+#define D6_USE_FRAME_OFFSET true
+
 
 
-btGeneric6DofConstraint::btGeneric6DofConstraint()
-:btTypedConstraint(D6_CONSTRAINT_TYPE),
-m_useLinearReferenceFrameA(true),
-m_useSolveConstraintObsolete(D6_USE_OBSOLETE_METHOD)
-{
-}
 
 
 
@@ -44,13 +40,31 @@ btGeneric6DofConstraint::btGeneric6DofConstraint(btRigidBody& rbA, btRigidBody&
 , m_frameInA(frameInA)
 , m_frameInB(frameInB),
 m_useLinearReferenceFrameA(useLinearReferenceFrameA),
+m_useOffsetForConstraintFrame(D6_USE_FRAME_OFFSET),
+m_flags(0),
 m_useSolveConstraintObsolete(D6_USE_OBSOLETE_METHOD)
 {
-
+	calculateTransforms();
 }
 
 
 
+btGeneric6DofConstraint::btGeneric6DofConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB)
+        : btTypedConstraint(D6_CONSTRAINT_TYPE, getFixedBody(), rbB),
+		m_frameInB(frameInB),
+		m_useLinearReferenceFrameA(useLinearReferenceFrameB),
+		m_useOffsetForConstraintFrame(D6_USE_FRAME_OFFSET),
+		m_flags(0),
+		m_useSolveConstraintObsolete(false)
+{
+	///not providing rigidbody A means implicitly using worldspace for body A
+	m_frameInA = rbB.getCenterOfMassTransform() * m_frameInB;
+	calculateTransforms();
+}
+
+
+
+
 #define GENERIC_D6_DISABLE_WARMSTARTING 1
 
 
@@ -116,12 +130,20 @@ int btRotationalLimitMotor::testLimitValue(btScalar test_value)
 	{
 		m_currentLimit = 1;//low limit violation
 		m_currentLimitError =  test_value - m_loLimit;
+		if(m_currentLimitError>SIMD_PI) 
+			m_currentLimitError-=SIMD_2_PI;
+		else if(m_currentLimitError<-SIMD_PI) 
+			m_currentLimitError+=SIMD_2_PI;
 		return 1;
 	}
 	else if (test_value> m_hiLimit)
 	{
 		m_currentLimit = 2;//High limit violation
 		m_currentLimitError = test_value - m_hiLimit;
+		if(m_currentLimitError>SIMD_PI) 
+			m_currentLimitError-=SIMD_2_PI;
+		else if(m_currentLimitError<-SIMD_PI) 
+			m_currentLimitError+=SIMD_2_PI;
 		return 2;
 	};
 
@@ -134,7 +156,7 @@ int btRotationalLimitMotor::testLimitValue(btScalar test_value)
 
 btScalar btRotationalLimitMotor::solveAngularLimits(
 	btScalar timeStep,btVector3& axis,btScalar jacDiagABInv,
-	btRigidBody * body0, btSolverBody& bodyA, btRigidBody * body1, btSolverBody& bodyB)
+	btRigidBody * body0, btRigidBody * body1 )
 {
 	if (needApplyTorques()==false) return 0.0f;
 
@@ -144,7 +166,7 @@ btScalar btRotationalLimitMotor::solveAngularLimits(
 	//current error correction
 	if (m_currentLimit!=0)
 	{
-		target_velocity = -m_ERP*m_currentLimitError/(timeStep);
+		target_velocity = -m_stopERP*m_currentLimitError/(timeStep);
 		maxMotorForce = m_maxLimitForce;
 	}
 
@@ -152,10 +174,8 @@ btScalar btRotationalLimitMotor::solveAngularLimits(
 
 	// current velocity difference
 
-	btVector3 angVelA;
-	bodyA.getAngularVelocity(angVelA);
-	btVector3 angVelB;
-	bodyB.getAngularVelocity(angVelB);
+	btVector3 angVelA = body0->getAngularVelocity();
+	btVector3 angVelB = body1->getAngularVelocity();
 
 	btVector3 vel_diff;
 	vel_diff = angVelA-angVelB;
@@ -203,12 +223,8 @@ btScalar btRotationalLimitMotor::solveAngularLimits(
 
 	btVector3 motorImp = clippedMotorImpulse * axis;
 
-	//body0->applyTorqueImpulse(motorImp);
-	//body1->applyTorqueImpulse(-motorImp);
-
-	bodyA.applyImpulse(btVector3(0,0,0), body0->getInvInertiaTensorWorld()*axis,clippedMotorImpulse);
-	bodyB.applyImpulse(btVector3(0,0,0), body1->getInvInertiaTensorWorld()*axis,-clippedMotorImpulse);
-
+	body0->applyTorqueImpulse(motorImp);
+	body1->applyTorqueImpulse(-motorImp);
 
 	return clippedMotorImpulse;
 
@@ -257,8 +273,8 @@ int btTranslationalLimitMotor::testLimitValue(int limitIndex, btScalar test_valu
 btScalar btTranslationalLimitMotor::solveLinearAxis(
 	btScalar timeStep,
 	btScalar jacDiagABInv,
-	btRigidBody& body1,btSolverBody& bodyA,const btVector3 &pointInA,
-	btRigidBody& body2,btSolverBody& bodyB,const btVector3 &pointInB,
+	btRigidBody& body1,const btVector3 &pointInA,
+	btRigidBody& body2,const btVector3 &pointInB,
 	int limit_index,
 	const btVector3 & axis_normal_on_a,
 	const btVector3 & anchorPos)
@@ -270,10 +286,8 @@ btScalar btTranslationalLimitMotor::solveLinearAxis(
 	btVector3 rel_pos1 = anchorPos - body1.getCenterOfMassPosition();
 	btVector3 rel_pos2 = anchorPos - body2.getCenterOfMassPosition();
 
-	btVector3 vel1;
-	bodyA.getVelocityInLocalPointObsolete(rel_pos1,vel1);
-	btVector3 vel2;
-	bodyB.getVelocityInLocalPointObsolete(rel_pos2,vel2);
+	btVector3 vel1 = body1.getVelocityInLocalPoint(rel_pos1);
+	btVector3 vel2 = body2.getVelocityInLocalPoint(rel_pos2);
 	btVector3 vel = vel1 - vel2;
 
 	btScalar rel_vel = axis_normal_on_a.dot(vel);
@@ -326,16 +340,10 @@ btScalar btTranslationalLimitMotor::solveLinearAxis(
 	normalImpulse = m_accumulatedImpulse[limit_index] - oldNormalImpulse;
 
 	btVector3 impulse_vector = axis_normal_on_a * normalImpulse;
-	//body1.applyImpulse( impulse_vector, rel_pos1);
-	//body2.applyImpulse(-impulse_vector, rel_pos2);
-
-	btVector3 ftorqueAxis1 = rel_pos1.cross(axis_normal_on_a);
-	btVector3 ftorqueAxis2 = rel_pos2.cross(axis_normal_on_a);
-	bodyA.applyImpulse(axis_normal_on_a*body1.getInvMass(), body1.getInvInertiaTensorWorld()*ftorqueAxis1,normalImpulse);
-	bodyB.applyImpulse(axis_normal_on_a*body2.getInvMass(), body2.getInvInertiaTensorWorld()*ftorqueAxis2,-normalImpulse);
-
-
+	body1.applyImpulse( impulse_vector, rel_pos1);
+	body2.applyImpulse(-impulse_vector, rel_pos2);
 
+	
 
 	return normalImpulse;
 }
@@ -384,6 +392,22 @@ void btGeneric6DofConstraint::calculateTransforms(const btTransform& transA,cons
 	m_calculatedTransformB = transB * m_frameInB;
 	calculateLinearInfo();
 	calculateAngleInfo();
+	if(m_useOffsetForConstraintFrame)
+	{	//  get weight factors depending on masses
+		btScalar miA = getRigidBodyA().getInvMass();
+		btScalar miB = getRigidBodyB().getInvMass();
+		m_hasStaticBody = (miA < SIMD_EPSILON) || (miB < SIMD_EPSILON);
+		btScalar miS = miA + miB;
+		if(miS > btScalar(0.f))
+		{
+			m_factA = miB / miS;
+		}
+		else 
+		{
+			m_factA = btScalar(0.5f);
+		}
+		m_factB = btScalar(1.0f) - m_factA;
+	}
 }
 
 
@@ -544,43 +568,59 @@ void btGeneric6DofConstraint::getInfo1NonVirtual (btConstraintInfo1* info)
 
 void btGeneric6DofConstraint::getInfo2 (btConstraintInfo2* info)
 {
-	getInfo2NonVirtual(info,m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(), m_rbA.getLinearVelocity(),m_rbB.getLinearVelocity(),m_rbA.getAngularVelocity(), m_rbB.getAngularVelocity());
+	btAssert(!m_useSolveConstraintObsolete);
+
+	const btTransform& transA = m_rbA.getCenterOfMassTransform();
+	const btTransform& transB = m_rbB.getCenterOfMassTransform();
+	const btVector3& linVelA = m_rbA.getLinearVelocity();
+	const btVector3& linVelB = m_rbB.getLinearVelocity();
+	const btVector3& angVelA = m_rbA.getAngularVelocity();
+	const btVector3& angVelB = m_rbB.getAngularVelocity();
+
+	if(m_useOffsetForConstraintFrame)
+	{ // for stability better to solve angular limits first
+		int row = setAngularLimits(info, 0,transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setLinearLimits(info, row, transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
+	else
+	{ // leave old version for compatibility
+		int row = setLinearLimits(info, 0, transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setAngularLimits(info, row,transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
+
 }
 
+
 void btGeneric6DofConstraint::getInfo2NonVirtual (btConstraintInfo2* info, const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB)
 {
+	
 	btAssert(!m_useSolveConstraintObsolete);
-
 	//prepare constraint
 	calculateTransforms(transA,transB);
-	
+
 	int i;
-	//test linear limits
-	for(i = 0; i < 3; i++)
-	{
-		if(m_linearLimits.needApplyForce(i))
-		{
-	
-		}
-	}
-	//test angular limits
 	for (i=0;i<3 ;i++ )
 	{
-		if(testAngularLimitMotor(i))
-		{
-	
-		}
+		testAngularLimitMotor(i);
 	}
 
-	int row = setLinearLimits(info,transA,transB,linVelA,linVelB,angVelA,angVelB);
-	setAngularLimits(info, row,transA,transB,linVelA,linVelB,angVelA,angVelB);
+	if(m_useOffsetForConstraintFrame)
+	{ // for stability better to solve angular limits first
+		int row = setAngularLimits(info, 0,transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setLinearLimits(info, row, transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
+	else
+	{ // leave old version for compatibility
+		int row = setLinearLimits(info, 0, transA,transB,linVelA,linVelB,angVelA,angVelB);
+		setAngularLimits(info, row,transA,transB,linVelA,linVelB,angVelA,angVelB);
+	}
 }
 
 
 
-int btGeneric6DofConstraint::setLinearLimits(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB)
+int btGeneric6DofConstraint::setLinearLimits(btConstraintInfo2* info, int row, const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB)
 {
-	int row = 0;
+//	int row = 0;
 	//solve linear limits
 	btRotationalLimitMotor limot;
 	for (int i=0;i<3 ;i++ )
@@ -593,7 +633,6 @@ int btGeneric6DofConstraint::setLinearLimits(btConstraintInfo2* info,const btTra
 			limot.m_currentLimitError  = m_linearLimits.m_currentLimitError[i];
 			limot.m_damping  = m_linearLimits.m_damping;
 			limot.m_enableMotor  = m_linearLimits.m_enableMotor[i];
-			limot.m_ERP  = m_linearLimits.m_restitution;
 			limot.m_hiLimit  = m_linearLimits.m_upperLimit[i];
 			limot.m_limitSoftness  = m_linearLimits.m_limitSoftness;
 			limot.m_loLimit  = m_linearLimits.m_lowerLimit[i];
@@ -601,9 +640,25 @@ int btGeneric6DofConstraint::setLinearLimits(btConstraintInfo2* info,const btTra
 			limot.m_maxMotorForce  = m_linearLimits.m_maxMotorForce[i];
 			limot.m_targetVelocity  = m_linearLimits.m_targetVelocity[i];
 			btVector3 axis = m_calculatedTransformA.getBasis().getColumn(i);
-			row += get_limit_motor_info2(&limot, 
-				transA,transB,linVelA,linVelB,angVelA,angVelB
-				, info, row, axis, 0);
+			int flags = m_flags >> (i * BT_6DOF_FLAGS_AXIS_SHIFT);
+			limot.m_normalCFM	= (flags & BT_6DOF_FLAGS_CFM_NORM) ? m_linearLimits.m_normalCFM[i] : info->cfm[0];
+			limot.m_stopCFM		= (flags & BT_6DOF_FLAGS_CFM_STOP) ? m_linearLimits.m_stopCFM[i] : info->cfm[0];
+			limot.m_stopERP		= (flags & BT_6DOF_FLAGS_ERP_STOP) ? m_linearLimits.m_stopERP[i] : info->erp;
+			if(m_useOffsetForConstraintFrame)
+			{
+				int indx1 = (i + 1) % 3;
+				int indx2 = (i + 2) % 3;
+				int rotAllowed = 1; // rotations around orthos to current axis
+				if(m_angularLimits[indx1].m_currentLimit && m_angularLimits[indx2].m_currentLimit)
+				{
+					rotAllowed = 0;
+				}
+				row += get_limit_motor_info2(&limot, transA,transB,linVelA,linVelB,angVelA,angVelB, info, row, axis, 0, rotAllowed);
+			}
+			else
+			{
+				row += get_limit_motor_info2(&limot, transA,transB,linVelA,linVelB,angVelA,angVelB, info, row, axis, 0);
+			}
 		}
 	}
 	return row;
@@ -621,10 +676,21 @@ int btGeneric6DofConstraint::setAngularLimits(btConstraintInfo2 *info, int row_o
 		if(d6constraint->getRotationalLimitMotor(i)->needApplyTorques())
 		{
 			btVector3 axis = d6constraint->getAxis(i);
-			row += get_limit_motor_info2(
-				d6constraint->getRotationalLimitMotor(i),
-				transA,transB,linVelA,linVelB,angVelA,angVelB,
-				info,row,axis,1);
+			int flags = m_flags >> ((i + 3) * BT_6DOF_FLAGS_AXIS_SHIFT);
+			if(!(flags & BT_6DOF_FLAGS_CFM_NORM))
+			{
+				m_angularLimits[i].m_normalCFM = info->cfm[0];
+			}
+			if(!(flags & BT_6DOF_FLAGS_CFM_STOP))
+			{
+				m_angularLimits[i].m_stopCFM = info->cfm[0];
+			}
+			if(!(flags & BT_6DOF_FLAGS_ERP_STOP))
+			{
+				m_angularLimits[i].m_stopERP = info->erp;
+			}
+			row += get_limit_motor_info2(d6constraint->getRotationalLimitMotor(i),
+												transA,transB,linVelA,linVelB,angVelA,angVelB, info,row,axis,1);
 		}
 	}
 
@@ -633,66 +699,6 @@ int btGeneric6DofConstraint::setAngularLimits(btConstraintInfo2 *info, int row_o
 
 
 
-void btGeneric6DofConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep)
-{
-	if (m_useSolveConstraintObsolete)
-	{
-
-
-		m_timeStep = timeStep;
-
-		//calculateTransforms();
-
-		int i;
-
-		// linear
-
-		btVector3 pointInA = m_calculatedTransformA.getOrigin();
-		btVector3 pointInB = m_calculatedTransformB.getOrigin();
-
-		btScalar jacDiagABInv;
-		btVector3 linear_axis;
-		for (i=0;i<3;i++)
-		{
-			if (m_linearLimits.isLimited(i))
-			{
-				jacDiagABInv = btScalar(1.) / m_jacLinear[i].getDiagonal();
-
-				if (m_useLinearReferenceFrameA)
-					linear_axis = m_calculatedTransformA.getBasis().getColumn(i);
-				else
-					linear_axis = m_calculatedTransformB.getBasis().getColumn(i);
-
-				m_linearLimits.solveLinearAxis(
-					m_timeStep,
-					jacDiagABInv,
-					m_rbA,bodyA,pointInA,
-					m_rbB,bodyB,pointInB,
-					i,linear_axis, m_AnchorPos);
-
-			}
-		}
-
-		// angular
-		btVector3 angular_axis;
-		btScalar angularJacDiagABInv;
-		for (i=0;i<3;i++)
-		{
-			if (m_angularLimits[i].needApplyTorques())
-			{
-
-				// get axis
-				angular_axis = getAxis(i);
-
-				angularJacDiagABInv = btScalar(1.) / m_jacAng[i].getDiagonal();
-
-				m_angularLimits[i].solveAngularLimits(m_timeStep,angular_axis,angularJacDiagABInv, &m_rbA,bodyA,&m_rbB,bodyB);
-			}
-		}
-	}
-}
-
-
 
 void	btGeneric6DofConstraint::updateRHS(btScalar	timeStep)
 {
@@ -701,6 +707,15 @@ void	btGeneric6DofConstraint::updateRHS(btScalar	timeStep)
 }
 
 
+void btGeneric6DofConstraint::setFrames(const btTransform& frameA, const btTransform& frameB)
+{
+	m_frameInA = frameA;
+	m_frameInB = frameB;
+	buildJacobian();
+	calculateTransforms();
+}
+
+
 
 btVector3 btGeneric6DofConstraint::getAxis(int axis_index) const
 {
@@ -758,7 +773,7 @@ void btGeneric6DofConstraint::calculateLinearInfo()
 int btGeneric6DofConstraint::get_limit_motor_info2(
 	btRotationalLimitMotor * limot,
 	const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB,
-	btConstraintInfo2 *info, int row, btVector3& ax1, int rotational)
+	btConstraintInfo2 *info, int row, btVector3& ax1, int rotational,int rotAllowed)
 {
     int srow = row * info->rowskip;
     int powered = limot->m_enableMotor;
@@ -778,18 +793,51 @@ int btGeneric6DofConstraint::get_limit_motor_info2(
         }
         if((!rotational))
         {
-			btVector3 ltd;	// Linear Torque Decoupling vector
-			btVector3 c = m_calculatedTransformB.getOrigin() - transA.getOrigin();
-			ltd = c.cross(ax1);
-            info->m_J1angularAxis[srow+0] = ltd[0];
-            info->m_J1angularAxis[srow+1] = ltd[1];
-            info->m_J1angularAxis[srow+2] = ltd[2];
+			if (m_useOffsetForConstraintFrame)
+			{
+				btVector3 tmpA, tmpB, relA, relB;
+				// get vector from bodyB to frameB in WCS
+				relB = m_calculatedTransformB.getOrigin() - transB.getOrigin();
+				// get its projection to constraint axis
+				btVector3 projB = ax1 * relB.dot(ax1);
+				// get vector directed from bodyB to constraint axis (and orthogonal to it)
+				btVector3 orthoB = relB - projB;
+				// same for bodyA
+				relA = m_calculatedTransformA.getOrigin() - transA.getOrigin();
+				btVector3 projA = ax1 * relA.dot(ax1);
+				btVector3 orthoA = relA - projA;
+				// get desired offset between frames A and B along constraint axis
+				btScalar desiredOffs = limot->m_currentPosition - limot->m_currentLimitError;
+				// desired vector from projection of center of bodyA to projection of center of bodyB to constraint axis
+				btVector3 totalDist = projA + ax1 * desiredOffs - projB;
+				// get offset vectors relA and relB
+				relA = orthoA + totalDist * m_factA;
+				relB = orthoB - totalDist * m_factB;
+				tmpA = relA.cross(ax1);
+				tmpB = relB.cross(ax1);
+				if(m_hasStaticBody && (!rotAllowed))
+				{
+					tmpA *= m_factA;
+					tmpB *= m_factB;
+				}
+				int i;
+				for (i=0; i<3; i++) info->m_J1angularAxis[srow+i] = tmpA[i];
+				for (i=0; i<3; i++) info->m_J2angularAxis[srow+i] = -tmpB[i];
+			} else
+			{
+				btVector3 ltd;	// Linear Torque Decoupling vector
+				btVector3 c = m_calculatedTransformB.getOrigin() - transA.getOrigin();
+				ltd = c.cross(ax1);
+				info->m_J1angularAxis[srow+0] = ltd[0];
+				info->m_J1angularAxis[srow+1] = ltd[1];
+				info->m_J1angularAxis[srow+2] = ltd[2];
 
-			c = m_calculatedTransformB.getOrigin() - transB.getOrigin();
-			ltd = -c.cross(ax1);
-			info->m_J2angularAxis[srow+0] = ltd[0];
-            info->m_J2angularAxis[srow+1] = ltd[1];
-            info->m_J2angularAxis[srow+2] = ltd[2];
+				c = m_calculatedTransformB.getOrigin() - transB.getOrigin();
+				ltd = -c.cross(ax1);
+				info->m_J2angularAxis[srow+0] = ltd[0];
+				info->m_J2angularAxis[srow+1] = ltd[1];
+				info->m_J2angularAxis[srow+2] = ltd[2];
+			}
         }
         // if we're limited low and high simultaneously, the joint motor is
         // ineffective
@@ -797,7 +845,7 @@ int btGeneric6DofConstraint::get_limit_motor_info2(
         info->m_constraintError[srow] = btScalar(0.f);
         if (powered)
         {
-            info->cfm[srow] = 0.0f;
+			info->cfm[srow] = limot->m_normalCFM;
             if(!limit)
             {
 				btScalar tag_vel = rotational ? limot->m_targetVelocity : -limot->m_targetVelocity;
@@ -806,7 +854,7 @@ int btGeneric6DofConstraint::get_limit_motor_info2(
 													limot->m_loLimit,
 													limot->m_hiLimit, 
 													tag_vel, 
-													info->fps * info->erp);
+													info->fps * limot->m_stopERP);
 				info->m_constraintError[srow] += mot_fact * limot->m_targetVelocity;
                 info->m_lowerLimit[srow] = -limot->m_maxMotorForce;
                 info->m_upperLimit[srow] = limot->m_maxMotorForce;
@@ -814,7 +862,7 @@ int btGeneric6DofConstraint::get_limit_motor_info2(
         }
         if(limit)
         {
-            btScalar k = info->fps * limot->m_ERP;
+            btScalar k = info->fps * limot->m_stopERP;
 			if(!rotational)
 			{
 				info->m_constraintError[srow] += k * limot->m_currentLimitError;
@@ -823,7 +871,7 @@ int btGeneric6DofConstraint::get_limit_motor_info2(
 			{
 				info->m_constraintError[srow] += -k * limot->m_currentLimitError;
 			}
-            info->cfm[srow] = 0.0f;
+			info->cfm[srow] = limot->m_stopCFM;
             if (limot->m_loLimit == limot->m_hiLimit)
             {   // limited low and high simultaneously
                 info->m_lowerLimit[srow] = -SIMD_INFINITY;
@@ -891,3 +939,126 @@ int btGeneric6DofConstraint::get_limit_motor_info2(
 
 
 
+
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+void btGeneric6DofConstraint::setParam(int num, btScalar value, int axis)
+{
+	if((axis >= 0) && (axis < 3))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				m_linearLimits.m_stopERP[axis] = value;
+				m_flags |= BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				m_linearLimits.m_stopCFM[axis] = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_CFM : 
+				m_linearLimits.m_normalCFM[axis] = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else if((axis >=3) && (axis < 6))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				m_angularLimits[axis - 3].m_stopERP = value;
+				m_flags |= BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				m_angularLimits[axis - 3].m_stopCFM = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			case BT_CONSTRAINT_CFM : 
+				m_angularLimits[axis - 3].m_normalCFM = value;
+				m_flags |= BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT);
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+}
+
+	///return the local value of parameter
+btScalar btGeneric6DofConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal = 0;
+	if((axis >= 0) && (axis < 3))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_linearLimits.m_stopERP[axis];
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_linearLimits.m_stopCFM[axis];
+				break;
+			case BT_CONSTRAINT_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_linearLimits.m_normalCFM[axis];
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else if((axis >=3) && (axis < 6))
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_STOP_ERP : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_ERP_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_angularLimits[axis - 3].m_stopERP;
+				break;
+			case BT_CONSTRAINT_STOP_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_STOP << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_angularLimits[axis - 3].m_stopCFM;
+				break;
+			case BT_CONSTRAINT_CFM : 
+				btAssertConstrParams(m_flags & (BT_6DOF_FLAGS_CFM_NORM << (axis * BT_6DOF_FLAGS_AXIS_SHIFT)));
+				retVal = m_angularLimits[axis - 3].m_normalCFM;
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+	return retVal;
+}
+
+ 
+
+void btGeneric6DofConstraint::setAxis(const btVector3& axis1,const btVector3& axis2)
+{
+	btVector3 zAxis = axis1.normalized();
+	btVector3 yAxis = axis2.normalized();
+	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
+	
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+	                                xAxis[1], yAxis[1], zAxis[1],
+	                               xAxis[2], yAxis[2], zAxis[2]);
+	
+	// now get constraint frame in local coordinate systems
+	m_frameInA = m_rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = m_rbB.getCenterOfMassTransform().inverse() * frameInW;
+	
+	calculateTransforms();
+}
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h
index 3d1936da3..0409f9537 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h
@@ -24,8 +24,8 @@ http://gimpact.sf.net
 */
 
 
-#ifndef GENERIC_6DOF_CONSTRAINT_H
-#define GENERIC_6DOF_CONSTRAINT_H
+#ifndef BT_GENERIC_6DOF_CONSTRAINT_H
+#define BT_GENERIC_6DOF_CONSTRAINT_H
 
 #include "LinearMath/btVector3.h"
 #include "btJacobianEntry.h"
@@ -49,7 +49,9 @@ public:
     btScalar m_maxLimitForce;//!< max force on limit
     btScalar m_damping;//!< Damping.
     btScalar m_limitSoftness;//! Relaxation factor
-    btScalar m_ERP;//!< Error tolerance factor when joint is at limit
+    btScalar m_normalCFM;//!< Constraint force mixing factor
+    btScalar m_stopERP;//!< Error tolerance factor when joint is at limit
+    btScalar m_stopCFM;//!< Constraint force mixing factor when joint is at limit
     btScalar m_bounce;//!< restitution factor
     bool m_enableMotor;
 
@@ -71,7 +73,9 @@ public:
         m_maxLimitForce = 300.0f;
         m_loLimit = 1.0f;
         m_hiLimit = -1.0f;
-        m_ERP = 0.5f;
+		m_normalCFM = 0.f;
+		m_stopERP = 0.2f;
+		m_stopCFM = 0.f;
         m_bounce = 0.0f;
         m_damping = 1.0f;
         m_limitSoftness = 0.5f;
@@ -87,7 +91,9 @@ public:
         m_limitSoftness = limot.m_limitSoftness;
         m_loLimit = limot.m_loLimit;
         m_hiLimit = limot.m_hiLimit;
-        m_ERP = limot.m_ERP;
+		m_normalCFM = limot.m_normalCFM;
+		m_stopERP = limot.m_stopERP;
+		m_stopCFM =	limot.m_stopCFM;
         m_bounce = limot.m_bounce;
         m_currentLimit = limot.m_currentLimit;
         m_currentLimitError = limot.m_currentLimitError;
@@ -117,7 +123,7 @@ public:
 	int testLimitValue(btScalar test_value);
 
 	//! apply the correction impulses for two bodies
-    btScalar solveAngularLimits(btScalar timeStep,btVector3& axis, btScalar jacDiagABInv,btRigidBody * body0, btSolverBody& bodyA,btRigidBody * body1,btSolverBody& bodyB);
+    btScalar solveAngularLimits(btScalar timeStep,btVector3& axis, btScalar jacDiagABInv,btRigidBody * body0, btRigidBody * body1);
 
 };
 
@@ -134,6 +140,9 @@ public:
     btScalar	m_limitSoftness;//!< Softness for linear limit
     btScalar	m_damping;//!< Damping for linear limit
     btScalar	m_restitution;//! Bounce parameter for linear limit
+	btVector3	m_normalCFM;//!< Constraint force mixing factor
+    btVector3	m_stopERP;//!< Error tolerance factor when joint is at limit
+	btVector3	m_stopCFM;//!< Constraint force mixing factor when joint is at limit
     //!@}
 	bool		m_enableMotor[3];
     btVector3	m_targetVelocity;//!< target motor velocity
@@ -147,6 +156,9 @@ public:
     	m_lowerLimit.setValue(0.f,0.f,0.f);
     	m_upperLimit.setValue(0.f,0.f,0.f);
     	m_accumulatedImpulse.setValue(0.f,0.f,0.f);
+		m_normalCFM.setValue(0.f, 0.f, 0.f);
+		m_stopERP.setValue(0.2f, 0.2f, 0.2f);
+		m_stopCFM.setValue(0.f, 0.f, 0.f);
 
     	m_limitSoftness = 0.7f;
     	m_damping = btScalar(1.0f);
@@ -168,6 +180,10 @@ public:
     	m_limitSoftness = other.m_limitSoftness ;
     	m_damping = other.m_damping;
     	m_restitution = other.m_restitution;
+		m_normalCFM = other.m_normalCFM;
+		m_stopERP = other.m_stopERP;
+		m_stopCFM = other.m_stopCFM;
+
 		for(int i=0; i < 3; i++) 
 		{
 			m_enableMotor[i] = other.m_enableMotor[i];
@@ -198,8 +214,8 @@ public:
     btScalar solveLinearAxis(
     	btScalar timeStep,
         btScalar jacDiagABInv,
-        btRigidBody& body1,btSolverBody& bodyA,const btVector3 &pointInA,
-        btRigidBody& body2,btSolverBody& bodyB,const btVector3 &pointInB,
+        btRigidBody& body1,const btVector3 &pointInA,
+        btRigidBody& body2,const btVector3 &pointInB,
         int limit_index,
         const btVector3 & axis_normal_on_a,
 		const btVector3 & anchorPos);
@@ -207,6 +223,15 @@ public:
 
 };
 
+enum bt6DofFlags
+{
+	BT_6DOF_FLAGS_CFM_NORM = 1,
+	BT_6DOF_FLAGS_CFM_STOP = 2,
+	BT_6DOF_FLAGS_ERP_STOP = 4
+};
+#define BT_6DOF_FLAGS_AXIS_SHIFT 3 // bits per axis
+
+
 /// btGeneric6DofConstraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space
 /*!
 btGeneric6DofConstraint can leave any of the 6 degree of freedom 'free' or 'locked'.
@@ -221,27 +246,29 @@ This brings support for limit parameters and motors. </li>
 
 <li> Angulars limits have these possible ranges:
 <table border=1 >
-<tr
-
+<tr>
 	<td><b>AXIS</b></td>
 	<td><b>MIN ANGLE</b></td>
 	<td><b>MAX ANGLE</b></td>
+</tr><tr>
 	<td>X</td>
-		<td>-PI</td>
-		<td>PI</td>
+	<td>-PI</td>
+	<td>PI</td>
+</tr><tr>
 	<td>Y</td>
-		<td>-PI/2</td>
-		<td>PI/2</td>
+	<td>-PI/2</td>
+	<td>PI/2</td>
+</tr><tr>
 	<td>Z</td>
-		<td>-PI/2</td>
-		<td>PI/2</td>
+	<td>-PI</td>
+	<td>PI</td>
 </tr>
 </table>
 </li>
 </ul>
 
 */
-class btGeneric6DofConstraint : public btTypedConstraint
+ATTRIBUTE_ALIGNED16(class) btGeneric6DofConstraint : public btTypedConstraint
 {
 protected:
 
@@ -278,11 +305,17 @@ protected:
     btVector3 m_calculatedAxisAngleDiff;
     btVector3 m_calculatedAxis[3];
     btVector3 m_calculatedLinearDiff;
+	btScalar	m_factA;
+	btScalar	m_factB;
+	bool		m_hasStaticBody;
     
 	btVector3 m_AnchorPos; // point betwen pivots of bodies A and B to solve linear axes
 
     bool	m_useLinearReferenceFrameA;
+	bool	m_useOffsetForConstraintFrame;
     
+	int		m_flags;
+
     //!@}
 
     btGeneric6DofConstraint&	operator=(btGeneric6DofConstraint&	other)
@@ -295,7 +328,7 @@ protected:
 
 	int setAngularLimits(btConstraintInfo2 *info, int row_offset,const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB);
 
-	int setLinearLimits(btConstraintInfo2 *info,const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB);
+	int setLinearLimits(btConstraintInfo2 *info, int row, const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB);
 
     void buildLinearJacobian(
         btJacobianEntry & jacLinear,const btVector3 & normalWorld,
@@ -313,13 +346,14 @@ protected:
 
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	///for backwards compatibility during the transition to 'getInfo/getInfo2'
 	bool		m_useSolveConstraintObsolete;
 
     btGeneric6DofConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA);
-
-    btGeneric6DofConstraint();
-
+    btGeneric6DofConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB);
+    
 	//! Calcs global transform of the offsets
 	/*!
 	Calcs the global transform for the joint offset for body A an B, and also calcs the agle differences between the bodies.
@@ -381,8 +415,6 @@ public:
 	void getInfo2NonVirtual (btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB);
 
 
-    virtual	void	solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep);
-
     void	updateRHS(btScalar	timeStep);
 
 	//! Get the rotation axis in global coordinates
@@ -403,6 +435,7 @@ public:
 	*/
 	btScalar getRelativePivotPosition(int axis_index) const;
 
+	void setFrames(const btTransform & frameA, const btTransform & frameB);
 
 	//! Test angular limit.
 	/*!
@@ -416,10 +449,20 @@ public:
     	m_linearLimits.m_lowerLimit = linearLower;
     }
 
-    void	setLinearUpperLimit(const btVector3& linearUpper)
-    {
-    	m_linearLimits.m_upperLimit = linearUpper;
-    }
+	void	getLinearLowerLimit(btVector3& linearLower)
+	{
+		linearLower = m_linearLimits.m_lowerLimit;
+	}
+
+	void	setLinearUpperLimit(const btVector3& linearUpper)
+	{
+		m_linearLimits.m_upperLimit = linearUpper;
+	}
+
+	void	getLinearUpperLimit(btVector3& linearUpper)
+	{
+		linearUpper = m_linearLimits.m_upperLimit;
+	}
 
     void	setAngularLowerLimit(const btVector3& angularLower)
     {
@@ -427,12 +470,24 @@ public:
 			m_angularLimits[i].m_loLimit = btNormalizeAngle(angularLower[i]);
     }
 
+	void	getAngularLowerLimit(btVector3& angularLower)
+	{
+		for(int i = 0; i < 3; i++) 
+			angularLower[i] = m_angularLimits[i].m_loLimit;
+	}
+
     void	setAngularUpperLimit(const btVector3& angularUpper)
     {
 		for(int i = 0; i < 3; i++)
 			m_angularLimits[i].m_hiLimit = btNormalizeAngle(angularUpper[i]);
     }
 
+	void	getAngularUpperLimit(btVector3& angularUpper)
+	{
+		for(int i = 0; i < 3; i++)
+			angularUpper[i] = m_angularLimits[i].m_hiLimit;
+	}
+
 	//! Retrieves the angular limit informacion
     btRotationalLimitMotor * getRotationalLimitMotor(int index)
     {
@@ -483,10 +538,79 @@ public:
 
 	int get_limit_motor_info2(	btRotationalLimitMotor * limot,
 								const btTransform& transA,const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB,const btVector3& angVelA,const btVector3& angVelB,
-								btConstraintInfo2 *info, int row, btVector3& ax1, int rotational);
+								btConstraintInfo2 *info, int row, btVector3& ax1, int rotational, int rotAllowed = false);
+
+	// access for UseFrameOffset
+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	void setAxis( const btVector3& axis1, const btVector3& axis2);
 
 
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+	
 };
 
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btGeneric6DofConstraintData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformFloatData m_rbBFrame;
+	
+	btVector3FloatData	m_linearUpperLimit;
+	btVector3FloatData	m_linearLowerLimit;
 
-#endif //GENERIC_6DOF_CONSTRAINT_H
+	btVector3FloatData	m_angularUpperLimit;
+	btVector3FloatData	m_angularLowerLimit;
+	
+	int	m_useLinearReferenceFrameA;
+	int m_useOffsetForConstraintFrame;
+};
+
+SIMD_FORCE_INLINE	int	btGeneric6DofConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btGeneric6DofConstraintData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btGeneric6DofConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btGeneric6DofConstraintData* dof = (btGeneric6DofConstraintData*)dataBuffer;
+	btTypedConstraint::serialize(&dof->m_typeConstraintData,serializer);
+
+	m_frameInA.serializeFloat(dof->m_rbAFrame);
+	m_frameInB.serializeFloat(dof->m_rbBFrame);
+
+		
+	int i;
+	for (i=0;i<3;i++)
+	{
+		dof->m_angularLowerLimit.m_floats[i] =  float(m_angularLimits[i].m_loLimit);
+		dof->m_angularUpperLimit.m_floats[i] =  float(m_angularLimits[i].m_hiLimit);
+		dof->m_linearLowerLimit.m_floats[i] = float(m_linearLimits.m_lowerLimit[i]);
+		dof->m_linearUpperLimit.m_floats[i] = float(m_linearLimits.m_upperLimit[i]);
+	}
+	
+	dof->m_useLinearReferenceFrameA = m_useLinearReferenceFrameA? 1 : 0;
+	dof->m_useOffsetForConstraintFrame = m_useOffsetForConstraintFrame ? 1 : 0;
+
+	return "btGeneric6DofConstraintData";
+}
+
+
+
+
+
+#endif //BT_GENERIC_6DOF_CONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp
index 5dbf4692e..6f765884e 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp
@@ -21,6 +21,21 @@ subject to the following restrictions:
 btGeneric6DofSpringConstraint::btGeneric6DofSpringConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA)
 	: btGeneric6DofConstraint(rbA, rbB, frameInA, frameInB, useLinearReferenceFrameA)
 {
+    init();
+}
+
+
+btGeneric6DofSpringConstraint::btGeneric6DofSpringConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB)
+        : btGeneric6DofConstraint(rbB, frameInB, useLinearReferenceFrameB)
+{
+    init();
+}
+
+
+void btGeneric6DofSpringConstraint::init()
+{
+	m_objectType = D6_SPRING_CONSTRAINT_TYPE;
+
 	for(int i = 0; i < 6; i++)
 	{
 		m_springEnabled[i] = false;
@@ -64,11 +79,13 @@ void btGeneric6DofSpringConstraint::setDamping(int index, btScalar damping)
 void btGeneric6DofSpringConstraint::setEquilibriumPoint()
 {
 	calculateTransforms();
-	for(int i = 0; i < 3; i++)
+	int i;
+
+	for( i = 0; i < 3; i++)
 	{
 		m_equilibriumPoint[i] = m_calculatedLinearDiff[i];
 	}
-	for(int i = 0; i < 3; i++)
+	for(i = 0; i < 3; i++)
 	{
 		m_equilibriumPoint[i + 3] = m_calculatedAxisAngleDiff[i];
 	}
@@ -86,17 +103,22 @@ void btGeneric6DofSpringConstraint::setEquilibriumPoint(int index)
 	}
 	else
 	{
-		m_equilibriumPoint[index + 3] = m_calculatedAxisAngleDiff[index];
+		m_equilibriumPoint[index] = m_calculatedAxisAngleDiff[index - 3];
 	}
 }
 
+void btGeneric6DofSpringConstraint::setEquilibriumPoint(int index, btScalar val)
+{
+	btAssert((index >= 0) && (index < 6));
+	m_equilibriumPoint[index] = val;
+}
 
 
 void btGeneric6DofSpringConstraint::internalUpdateSprings(btConstraintInfo2* info)
 {
 	// it is assumed that calculateTransforms() have been called before this call
 	int i;
-	btVector3 relVel = m_rbB.getLinearVelocity() - m_rbA.getLinearVelocity();
+	//btVector3 relVel = m_rbB.getLinearVelocity() - m_rbA.getLinearVelocity();
 	for(i = 0; i < 3; i++)
 	{
 		if(m_springEnabled[i])
@@ -140,5 +162,24 @@ void btGeneric6DofSpringConstraint::getInfo2(btConstraintInfo2* info)
 }
 
 
+void btGeneric6DofSpringConstraint::setAxis(const btVector3& axis1,const btVector3& axis2)
+{
+	btVector3 zAxis = axis1.normalized();
+	btVector3 yAxis = axis2.normalized();
+	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
+
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+                                xAxis[1], yAxis[1], zAxis[1],
+                                xAxis[2], yAxis[2], zAxis[2]);
+
+	// now get constraint frame in local coordinate systems
+	m_frameInA = m_rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = m_rbB.getCenterOfMassTransform().inverse() * frameInW;
+
+  calculateTransforms();
+}
+
 
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h
index e0c1fc9ae..6fabb3036 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef GENERIC_6DOF_SPRING_CONSTRAINT_H
-#define GENERIC_6DOF_SPRING_CONSTRAINT_H
+#ifndef BT_GENERIC_6DOF_SPRING_CONSTRAINT_H
+#define BT_GENERIC_6DOF_SPRING_CONSTRAINT_H
 
 
 #include "LinearMath/btVector3.h"
@@ -32,23 +32,71 @@ subject to the following restrictions:
 /// 4 : rotation Y (2nd Euler rotational around new position of Y axis, range [-PI/2+epsilon, PI/2-epsilon] )
 /// 5 : rotation Z (1st Euler rotational around Z axis, range [-PI+epsilon, PI-epsilon] )
 
-class btGeneric6DofSpringConstraint : public btGeneric6DofConstraint
+ATTRIBUTE_ALIGNED16(class) btGeneric6DofSpringConstraint : public btGeneric6DofConstraint
 {
 protected:
 	bool		m_springEnabled[6];
 	btScalar	m_equilibriumPoint[6];
 	btScalar	m_springStiffness[6];
 	btScalar	m_springDamping[6]; // between 0 and 1 (1 == no damping)
+	void init();
 	void internalUpdateSprings(btConstraintInfo2* info);
 public: 
+	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
     btGeneric6DofSpringConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA);
+    btGeneric6DofSpringConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB);
 	void enableSpring(int index, bool onOff);
 	void setStiffness(int index, btScalar stiffness);
 	void setDamping(int index, btScalar damping);
 	void setEquilibriumPoint(); // set the current constraint position/orientation as an equilibrium point for all DOF
 	void setEquilibriumPoint(int index);  // set the current constraint position/orientation as an equilibrium point for given DOF
+	void setEquilibriumPoint(int index, btScalar val);
+
+	virtual void setAxis( const btVector3& axis1, const btVector3& axis2);
+
 	virtual void getInfo2 (btConstraintInfo2* info);
+
+	virtual	int	calculateSerializeBufferSize() const;
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 };
 
-#endif // GENERIC_6DOF_SPRING_CONSTRAINT_H
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btGeneric6DofSpringConstraintData
+{
+	btGeneric6DofConstraintData	m_6dofData;
+	
+	int			m_springEnabled[6];
+	float		m_equilibriumPoint[6];
+	float		m_springStiffness[6];
+	float		m_springDamping[6];
+};
+
+SIMD_FORCE_INLINE	int	btGeneric6DofSpringConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btGeneric6DofSpringConstraintData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btGeneric6DofSpringConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btGeneric6DofSpringConstraintData* dof = (btGeneric6DofSpringConstraintData*)dataBuffer;
+	btGeneric6DofConstraint::serialize(&dof->m_6dofData,serializer);
+
+	int i;
+	for (i=0;i<6;i++)
+	{
+		dof->m_equilibriumPoint[i] = (float)m_equilibriumPoint[i];
+		dof->m_springDamping[i] = (float)m_springDamping[i];
+		dof->m_springEnabled[i] = m_springEnabled[i]? 1 : 0;
+		dof->m_springStiffness[i] = (float)m_springStiffness[i];
+	}
+	return "btGeneric6DofSpringConstraintData";
+}
+
+#endif // BT_GENERIC_6DOF_SPRING_CONSTRAINT_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHinge2Constraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHinge2Constraint.h
index 15fd4a014..9a0049869 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHinge2Constraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHinge2Constraint.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef HINGE2_CONSTRAINT_H
-#define HINGE2_CONSTRAINT_H
+#ifndef BT_HINGE2_CONSTRAINT_H
+#define BT_HINGE2_CONSTRAINT_H
 
 
 
@@ -29,13 +29,15 @@ subject to the following restrictions:
 // 2 rotational degrees of freedom, similar to Euler rotations around Z (axis 1) and X (axis 2)
 // 1 translational (along axis Z) with suspension spring
 
-class btHinge2Constraint : public btGeneric6DofSpringConstraint
+ATTRIBUTE_ALIGNED16(class) btHinge2Constraint : public btGeneric6DofSpringConstraint
 {
 protected:
 	btVector3	m_anchor;
 	btVector3	m_axis1;
 	btVector3	m_axis2;
 public:
+		BT_DECLARE_ALIGNED_ALLOCATOR();
+		
 	// constructor
 	// anchor, axis1 and axis2 are in world coordinate system
 	// axis1 must be orthogonal to axis2
@@ -54,5 +56,5 @@ public:
 
 
 
-#endif // HINGE2_CONSTRAINT_H
+#endif // BT_HINGE2_CONSTRAINT_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp
index c637231c4..20b35fb9c 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.cpp
@@ -23,29 +23,29 @@ subject to the following restrictions:
 
 
 
+//#define HINGE_USE_OBSOLETE_SOLVER false
 #define HINGE_USE_OBSOLETE_SOLVER false
 
+#define HINGE_USE_FRAME_OFFSET true
 
 #ifndef __SPU__
 
-btHingeConstraint::btHingeConstraint()
-: btTypedConstraint (HINGE_CONSTRAINT_TYPE),
-m_enableAngularMotor(false),
-m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
-m_useReferenceFrameA(false)
-{
-	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
-}
+
 
 
 
 btHingeConstraint::btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB,
-									 btVector3& axisInA,btVector3& axisInB, bool useReferenceFrameA)
+									 const btVector3& axisInA,const btVector3& axisInB, bool useReferenceFrameA)
 									 :btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA,rbB),
+#ifdef _BT_USE_CENTER_LIMIT_
+									 m_limit(),
+#endif
 									 m_angularOnly(false),
 									 m_enableAngularMotor(false),
 									 m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
-									 m_useReferenceFrameA(useReferenceFrameA)
+									 m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+									 m_useReferenceFrameA(useReferenceFrameA),
+									 m_flags(0)
 {
 	m_rbAFrame.getOrigin() = pivotInA;
 	
@@ -78,6 +78,7 @@ btHingeConstraint::btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const bt
 									rbAxisB1.getY(),rbAxisB2.getY(),axisInB.getY(),
 									rbAxisB1.getZ(),rbAxisB2.getZ(),axisInB.getZ() );
 	
+#ifndef	_BT_USE_CENTER_LIMIT_
 	//start with free
 	m_lowerLimit = btScalar(1.0f);
 	m_upperLimit = btScalar(-1.0f);
@@ -85,15 +86,22 @@ btHingeConstraint::btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const bt
 	m_relaxationFactor = 1.0f;
 	m_limitSoftness = 0.9f;
 	m_solveLimit = false;
+#endif
 	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
 }
 
 
 
-btHingeConstraint::btHingeConstraint(btRigidBody& rbA,const btVector3& pivotInA,btVector3& axisInA, bool useReferenceFrameA)
-:btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA), m_angularOnly(false), m_enableAngularMotor(false), 
+btHingeConstraint::btHingeConstraint(btRigidBody& rbA,const btVector3& pivotInA,const btVector3& axisInA, bool useReferenceFrameA)
+:btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA),
+#ifdef _BT_USE_CENTER_LIMIT_
+m_limit(),
+#endif
+m_angularOnly(false), m_enableAngularMotor(false), 
 m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
-m_useReferenceFrameA(useReferenceFrameA)
+m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+m_useReferenceFrameA(useReferenceFrameA),
+m_flags(0)
 {
 
 	// since no frame is given, assume this to be zero angle and just pick rb transform axis
@@ -118,6 +126,7 @@ m_useReferenceFrameA(useReferenceFrameA)
 									rbAxisB1.getY(),rbAxisB2.getY(),axisInB.getY(),
 									rbAxisB1.getZ(),rbAxisB2.getZ(),axisInB.getZ() );
 	
+#ifndef	_BT_USE_CENTER_LIMIT_
 	//start with free
 	m_lowerLimit = btScalar(1.0f);
 	m_upperLimit = btScalar(-1.0f);
@@ -125,6 +134,7 @@ m_useReferenceFrameA(useReferenceFrameA)
 	m_relaxationFactor = 1.0f;
 	m_limitSoftness = 0.9f;
 	m_solveLimit = false;
+#endif
 	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
 }
 
@@ -133,11 +143,17 @@ m_useReferenceFrameA(useReferenceFrameA)
 btHingeConstraint::btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, 
 								     const btTransform& rbAFrame, const btTransform& rbBFrame, bool useReferenceFrameA)
 :btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA,rbB),m_rbAFrame(rbAFrame),m_rbBFrame(rbBFrame),
+#ifdef _BT_USE_CENTER_LIMIT_
+m_limit(),
+#endif
 m_angularOnly(false),
 m_enableAngularMotor(false),
 m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
-m_useReferenceFrameA(useReferenceFrameA)
+m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+m_useReferenceFrameA(useReferenceFrameA),
+m_flags(0)
 {
+#ifndef	_BT_USE_CENTER_LIMIT_
 	//start with free
 	m_lowerLimit = btScalar(1.0f);
 	m_upperLimit = btScalar(-1.0f);
@@ -145,6 +161,7 @@ m_useReferenceFrameA(useReferenceFrameA)
 	m_relaxationFactor = 1.0f;
 	m_limitSoftness = 0.9f;
 	m_solveLimit = false;
+#endif
 	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
 }			
 
@@ -152,15 +169,20 @@ m_useReferenceFrameA(useReferenceFrameA)
 
 btHingeConstraint::btHingeConstraint(btRigidBody& rbA, const btTransform& rbAFrame, bool useReferenceFrameA)
 :btTypedConstraint(HINGE_CONSTRAINT_TYPE, rbA),m_rbAFrame(rbAFrame),m_rbBFrame(rbAFrame),
+#ifdef _BT_USE_CENTER_LIMIT_
+m_limit(),
+#endif
 m_angularOnly(false),
 m_enableAngularMotor(false),
 m_useSolveConstraintObsolete(HINGE_USE_OBSOLETE_SOLVER),
-m_useReferenceFrameA(useReferenceFrameA)
+m_useOffsetForConstraintFrame(HINGE_USE_FRAME_OFFSET),
+m_useReferenceFrameA(useReferenceFrameA),
+m_flags(0)
 {
 	///not providing rigidbody B means implicitly using worldspace for body B
 
 	m_rbBFrame.getOrigin() = m_rbA.getCenterOfMassTransform()(m_rbAFrame.getOrigin());
-
+#ifndef	_BT_USE_CENTER_LIMIT_
 	//start with free
 	m_lowerLimit = btScalar(1.0f);
 	m_upperLimit = btScalar(-1.0f);
@@ -168,6 +190,7 @@ m_useReferenceFrameA(useReferenceFrameA)
 	m_relaxationFactor = 1.0f;
 	m_limitSoftness = 0.9f;
 	m_solveLimit = false;
+#endif
 	m_referenceSign = m_useReferenceFrameA ? btScalar(-1.f) : btScalar(1.f);
 }
 
@@ -222,7 +245,6 @@ void	btHingeConstraint::buildJacobian()
 		
 		btPlaneSpace1(m_rbAFrame.getBasis().getColumn(2),jointAxis0local,jointAxis1local);
 
-		getRigidBodyA().getCenterOfMassTransform().getBasis() * m_rbAFrame.getBasis().getColumn(2);
 		btVector3 jointAxis0 = getRigidBodyA().getCenterOfMassTransform().getBasis() * jointAxis0local;
 		btVector3 jointAxis1 = getRigidBodyA().getCenterOfMassTransform().getBasis() * jointAxis1local;
 		btVector3 hingeAxisWorld = getRigidBodyA().getCenterOfMassTransform().getBasis() * m_rbAFrame.getBasis().getColumn(2);
@@ -259,164 +281,6 @@ void	btHingeConstraint::buildJacobian()
 	}
 }
 
-void	btHingeConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep)
-{
-
-	///for backwards compatibility during the transition to 'getInfo/getInfo2'
-	if (m_useSolveConstraintObsolete)
-	{
-
-		btVector3 pivotAInW = m_rbA.getCenterOfMassTransform()*m_rbAFrame.getOrigin();
-		btVector3 pivotBInW = m_rbB.getCenterOfMassTransform()*m_rbBFrame.getOrigin();
-
-		btScalar tau = btScalar(0.3);
-
-		//linear part
-		if (!m_angularOnly)
-		{
-			btVector3 rel_pos1 = pivotAInW - m_rbA.getCenterOfMassPosition(); 
-			btVector3 rel_pos2 = pivotBInW - m_rbB.getCenterOfMassPosition();
-
-			btVector3 vel1,vel2;
-			bodyA.getVelocityInLocalPointObsolete(rel_pos1,vel1);
-			bodyB.getVelocityInLocalPointObsolete(rel_pos2,vel2);
-			btVector3 vel = vel1 - vel2;
-
-			for (int i=0;i<3;i++)
-			{		
-				const btVector3& normal = m_jac[i].m_linearJointAxis;
-				btScalar jacDiagABInv = btScalar(1.) / m_jac[i].getDiagonal();
-
-				btScalar rel_vel;
-				rel_vel = normal.dot(vel);
-				//positional error (zeroth order error)
-				btScalar depth = -(pivotAInW - pivotBInW).dot(normal); //this is the error projected on the normal
-				btScalar impulse = depth*tau/timeStep  * jacDiagABInv -  rel_vel * jacDiagABInv;
-				m_appliedImpulse += impulse;
-				btVector3 impulse_vector = normal * impulse;
-				btVector3 ftorqueAxis1 = rel_pos1.cross(normal);
-				btVector3 ftorqueAxis2 = rel_pos2.cross(normal);
-				bodyA.applyImpulse(normal*m_rbA.getInvMass(), m_rbA.getInvInertiaTensorWorld()*ftorqueAxis1,impulse);
-				bodyB.applyImpulse(normal*m_rbB.getInvMass(), m_rbB.getInvInertiaTensorWorld()*ftorqueAxis2,-impulse);
-			}
-		}
-
-		
-		{
-			///solve angular part
-
-			// get axes in world space
-			btVector3 axisA =  getRigidBodyA().getCenterOfMassTransform().getBasis() *  m_rbAFrame.getBasis().getColumn(2);
-			btVector3 axisB =  getRigidBodyB().getCenterOfMassTransform().getBasis() *  m_rbBFrame.getBasis().getColumn(2);
-
-			btVector3 angVelA;
-			bodyA.getAngularVelocity(angVelA);
-			btVector3 angVelB;
-			bodyB.getAngularVelocity(angVelB);
-
-			btVector3 angVelAroundHingeAxisA = axisA * axisA.dot(angVelA);
-			btVector3 angVelAroundHingeAxisB = axisB * axisB.dot(angVelB);
-
-			btVector3 angAorthog = angVelA - angVelAroundHingeAxisA;
-			btVector3 angBorthog = angVelB - angVelAroundHingeAxisB;
-			btVector3 velrelOrthog = angAorthog-angBorthog;
-			{
-				
-
-				//solve orthogonal angular velocity correction
-				//btScalar relaxation = btScalar(1.);
-				btScalar len = velrelOrthog.length();
-				if (len > btScalar(0.00001))
-				{
-					btVector3 normal = velrelOrthog.normalized();
-					btScalar denom = getRigidBodyA().computeAngularImpulseDenominator(normal) +
-						getRigidBodyB().computeAngularImpulseDenominator(normal);
-					// scale for mass and relaxation
-					//velrelOrthog *= (btScalar(1.)/denom) * m_relaxationFactor;
-
-					bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*velrelOrthog,-(btScalar(1.)/denom));
-					bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*velrelOrthog,(btScalar(1.)/denom));
-
-				}
-
-				//solve angular positional correction
-				btVector3 angularError =  axisA.cross(axisB) *(btScalar(1.)/timeStep);
-				btScalar len2 = angularError.length();
-				if (len2>btScalar(0.00001))
-				{
-					btVector3 normal2 = angularError.normalized();
-					btScalar denom2 = getRigidBodyA().computeAngularImpulseDenominator(normal2) +
-							getRigidBodyB().computeAngularImpulseDenominator(normal2);
-					//angularError *= (btScalar(1.)/denom2) * relaxation;
-					
-					bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*angularError,(btScalar(1.)/denom2));
-					bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*angularError,-(btScalar(1.)/denom2));
-
-				}
-				
-				
-
-
-
-				// solve limit
-				if (m_solveLimit)
-				{
-					btScalar amplitude = ( (angVelB - angVelA).dot( axisA )*m_relaxationFactor + m_correction* (btScalar(1.)/timeStep)*m_biasFactor  ) * m_limitSign;
-
-					btScalar impulseMag = amplitude * m_kHinge;
-
-					// Clamp the accumulated impulse
-					btScalar temp = m_accLimitImpulse;
-					m_accLimitImpulse = btMax(m_accLimitImpulse + impulseMag, btScalar(0) );
-					impulseMag = m_accLimitImpulse - temp;
-
-
-					
-					bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*axisA,impulseMag * m_limitSign);
-					bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*axisA,-(impulseMag * m_limitSign));
-
-				}
-			}
-
-			//apply motor
-			if (m_enableAngularMotor) 
-			{
-				//todo: add limits too
-				btVector3 angularLimit(0,0,0);
-
-				btVector3 velrel = angVelAroundHingeAxisA - angVelAroundHingeAxisB;
-				btScalar projRelVel = velrel.dot(axisA);
-
-				btScalar desiredMotorVel = m_motorTargetVelocity;
-				btScalar motor_relvel = desiredMotorVel - projRelVel;
-
-				btScalar unclippedMotorImpulse = m_kHinge * motor_relvel;;
-
-				// accumulated impulse clipping:
-				btScalar fMaxImpulse = m_maxMotorImpulse;
-				btScalar newAccImpulse = m_accMotorImpulse + unclippedMotorImpulse;
-				btScalar clippedMotorImpulse = unclippedMotorImpulse;
-				if (newAccImpulse > fMaxImpulse)
-				{
-					newAccImpulse = fMaxImpulse;
-					clippedMotorImpulse = newAccImpulse - m_accMotorImpulse;
-				}
-				else if (newAccImpulse < -fMaxImpulse)
-				{
-					newAccImpulse = -fMaxImpulse;
-					clippedMotorImpulse = newAccImpulse - m_accMotorImpulse;
-				}
-				m_accMotorImpulse += clippedMotorImpulse;
-			
-				bodyA.applyImpulse(btVector3(0,0,0), m_rbA.getInvInertiaTensorWorld()*axisA,clippedMotorImpulse);
-				bodyB.applyImpulse(btVector3(0,0,0), m_rbB.getInvInertiaTensorWorld()*axisA,-clippedMotorImpulse);
-				
-			}
-		}
-	}
-
-}
-
 
 #endif //__SPU__
 
@@ -461,7 +325,14 @@ void btHingeConstraint::getInfo1NonVirtual(btConstraintInfo1* info)
 
 void btHingeConstraint::getInfo2 (btConstraintInfo2* info)
 {
-	getInfo2Internal(info, m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getAngularVelocity(),m_rbB.getAngularVelocity());
+	if(m_useOffsetForConstraintFrame)
+	{
+		getInfo2InternalUsingFrameOffset(info, m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getAngularVelocity(),m_rbB.getAngularVelocity());
+	}
+	else
+	{
+		getInfo2Internal(info, m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(),m_rbA.getAngularVelocity(),m_rbB.getAngularVelocity());
+	}
 }
 
 
@@ -507,10 +378,13 @@ void btHingeConstraint::getInfo2Internal(btConstraintInfo2* info, const btTransf
 	}
 #endif //#if 0
 	// linear (all fixed)
-    info->m_J1linearAxis[0] = 1;
-    info->m_J1linearAxis[skip + 1] = 1;
-    info->m_J1linearAxis[2 * skip + 2] = 1;
-	
+
+	if (!m_angularOnly)
+	{
+		info->m_J1linearAxis[0] = 1;
+		info->m_J1linearAxis[skip + 1] = 1;
+		info->m_J1linearAxis[2 * skip + 2] = 1;
+	}	
 
 
 
@@ -532,10 +406,13 @@ void btHingeConstraint::getInfo2Internal(btConstraintInfo2* info, const btTransf
 	}
 	// linear RHS
     btScalar k = info->fps * info->erp;
-	for(i = 0; i < 3; i++)
-    {
-        info->m_constraintError[i * skip] = k * (pivotBInW[i] - pivotAInW[i]);
-    }
+	if (!m_angularOnly)
+	{
+		for(i = 0; i < 3; i++)
+		{
+			info->m_constraintError[i * skip] = k * (pivotBInW[i] - pivotAInW[i]);
+		}
+	}
 	// make rotations around X and Y equal
 	// the hinge axis should be the only unconstrained
 	// rotational axis, the angular velocity of the two bodies perpendicular to
@@ -592,8 +469,13 @@ void btHingeConstraint::getInfo2Internal(btConstraintInfo2* info, const btTransf
 	int limit = 0;
 	if(getSolveLimit())
 	{
-		limit_err = m_correction * m_referenceSign;
-		limit = (limit_err > btScalar(0.0)) ? 1 : 2;
+#ifdef	_BT_USE_CENTER_LIMIT_
+	limit_err = m_limit.getCorrection() * m_referenceSign;
+#else
+	limit_err = m_correction * m_referenceSign;
+#endif
+	limit = (limit_err > btScalar(0.0)) ? 1 : 2;
+
 	}
 	// if the hinge has joint limits or motor, add in the extra row
 	int powered = 0;
@@ -620,19 +502,26 @@ void btHingeConstraint::getInfo2Internal(btConstraintInfo2* info, const btTransf
 			powered = 0;
 		}
 		info->m_constraintError[srow] = btScalar(0.0f);
+		btScalar currERP = (m_flags & BT_HINGE_FLAGS_ERP_STOP) ? m_stopERP : info->erp;
 		if(powered)
 		{
-            info->cfm[srow] = btScalar(0.0); 
-			btScalar mot_fact = getMotorFactor(m_hingeAngle, lostop, histop, m_motorTargetVelocity, info->fps * info->erp);
+			if(m_flags & BT_HINGE_FLAGS_CFM_NORM)
+			{
+				info->cfm[srow] = m_normalCFM;
+			}
+			btScalar mot_fact = getMotorFactor(m_hingeAngle, lostop, histop, m_motorTargetVelocity, info->fps * currERP);
 			info->m_constraintError[srow] += mot_fact * m_motorTargetVelocity * m_referenceSign;
 			info->m_lowerLimit[srow] = - m_maxMotorImpulse;
 			info->m_upperLimit[srow] =   m_maxMotorImpulse;
 		}
 		if(limit)
 		{
-			k = info->fps * info->erp;
+			k = info->fps * currERP;
 			info->m_constraintError[srow] += k * limit_err;
-			info->cfm[srow] = btScalar(0.0);
+			if(m_flags & BT_HINGE_FLAGS_CFM_STOP)
+			{
+				info->cfm[srow] = m_stopCFM;
+			}
 			if(lostop == histop) 
 			{
 				// limited low and high simultaneously
@@ -650,7 +539,11 @@ void btHingeConstraint::getInfo2Internal(btConstraintInfo2* info, const btTransf
 				info->m_upperLimit[srow] = 0;
 			}
 			// bounce (we'll use slider parameter abs(1.0 - m_dampingLimAng) for that)
+#ifdef	_BT_USE_CENTER_LIMIT_
+			btScalar bounce = m_limit.getRelaxationFactor();
+#else
 			btScalar bounce = m_relaxationFactor;
+#endif
 			if(bounce > btScalar(0.0))
 			{
 				btScalar vel = angVelA.dot(ax1);
@@ -680,14 +573,22 @@ void btHingeConstraint::getInfo2Internal(btConstraintInfo2* info, const btTransf
 					}
 				}
 			}
+#ifdef	_BT_USE_CENTER_LIMIT_
+			info->m_constraintError[srow] *= m_limit.getBiasFactor();
+#else
 			info->m_constraintError[srow] *= m_biasFactor;
+#endif
 		} // if(limit)
 	} // if angular limit or powered
 }
 
 
-
-
+void btHingeConstraint::setFrames(const btTransform & frameA, const btTransform & frameB)
+{
+	m_rbAFrame = frameA;
+	m_rbBFrame = frameB;
+	buildJacobian();
+}
 
 
 void	btHingeConstraint::updateRHS(btScalar	timeStep)
@@ -707,43 +608,20 @@ btScalar btHingeConstraint::getHingeAngle(const btTransform& transA,const btTran
 	const btVector3 refAxis0  = transA.getBasis() * m_rbAFrame.getBasis().getColumn(0);
 	const btVector3 refAxis1  = transA.getBasis() * m_rbAFrame.getBasis().getColumn(1);
 	const btVector3 swingAxis = transB.getBasis() * m_rbBFrame.getBasis().getColumn(1);
-	btScalar angle = btAtan2Fast(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1));
+//	btScalar angle = btAtan2Fast(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1));
+	btScalar angle = btAtan2(swingAxis.dot(refAxis0), swingAxis.dot(refAxis1));
 	return m_referenceSign * angle;
 }
 
 
-#if 0
-void btHingeConstraint::testLimit()
-{
-	// Compute limit information
-	m_hingeAngle = getHingeAngle();  
-	m_correction = btScalar(0.);
-	m_limitSign = btScalar(0.);
-	m_solveLimit = false;
-	if (m_lowerLimit <= m_upperLimit)
-	{
-		if (m_hingeAngle <= m_lowerLimit)
-		{
-			m_correction = (m_lowerLimit - m_hingeAngle);
-			m_limitSign = 1.0f;
-			m_solveLimit = true;
-		} 
-		else if (m_hingeAngle >= m_upperLimit)
-		{
-			m_correction = m_upperLimit - m_hingeAngle;
-			m_limitSign = -1.0f;
-			m_solveLimit = true;
-		}
-	}
-	return;
-}
-#else
-
 
 void btHingeConstraint::testLimit(const btTransform& transA,const btTransform& transB)
 {
 	// Compute limit information
 	m_hingeAngle = getHingeAngle(transA,transB);
+#ifdef	_BT_USE_CENTER_LIMIT_
+	m_limit.test(m_hingeAngle);
+#else
 	m_correction = btScalar(0.);
 	m_limitSign = btScalar(0.);
 	m_solveLimit = false;
@@ -763,9 +641,10 @@ void btHingeConstraint::testLimit(const btTransform& transA,const btTransform& t
 			m_solveLimit = true;
 		}
 	}
+#endif
 	return;
 }
-#endif
+
 
 static btVector3 vHinge(0, 0, btScalar(1));
 
@@ -785,7 +664,7 @@ void btHingeConstraint::setMotorTarget(const btQuaternion& qAinB, btScalar dt)
 	btScalar targetAngle = qHinge.getAngle();
 	if (targetAngle > SIMD_PI) // long way around. flip quat and recalculate.
 	{
-		qHinge = operator-(qHinge);
+		qHinge = -(qHinge);
 		targetAngle = qHinge.getAngle();
 	}
 	if (qHinge.getZ() < 0)
@@ -796,6 +675,9 @@ void btHingeConstraint::setMotorTarget(const btQuaternion& qAinB, btScalar dt)
 
 void btHingeConstraint::setMotorTarget(btScalar targetAngle, btScalar dt)
 {
+#ifdef	_BT_USE_CENTER_LIMIT_
+	m_limit.fit(targetAngle);
+#else
 	if (m_lowerLimit < m_upperLimit)
 	{
 		if (targetAngle < m_lowerLimit)
@@ -803,7 +685,7 @@ void btHingeConstraint::setMotorTarget(btScalar targetAngle, btScalar dt)
 		else if (targetAngle > m_upperLimit)
 			targetAngle = m_upperLimit;
 	}
-
+#endif
 	// compute angular velocity
 	btScalar curAngle  = getHingeAngle(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
 	btScalar dAngle = targetAngle - curAngle;
@@ -811,3 +693,342 @@ void btHingeConstraint::setMotorTarget(btScalar targetAngle, btScalar dt)
 }
 
 
+
+void btHingeConstraint::getInfo2InternalUsingFrameOffset(btConstraintInfo2* info, const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB)
+{
+	btAssert(!m_useSolveConstraintObsolete);
+	int i, s = info->rowskip;
+	// transforms in world space
+	btTransform trA = transA*m_rbAFrame;
+	btTransform trB = transB*m_rbBFrame;
+	// pivot point
+//	btVector3 pivotAInW = trA.getOrigin();
+//	btVector3 pivotBInW = trB.getOrigin();
+#if 1
+	// difference between frames in WCS
+	btVector3 ofs = trB.getOrigin() - trA.getOrigin();
+	// now get weight factors depending on masses
+	btScalar miA = getRigidBodyA().getInvMass();
+	btScalar miB = getRigidBodyB().getInvMass();
+	bool hasStaticBody = (miA < SIMD_EPSILON) || (miB < SIMD_EPSILON);
+	btScalar miS = miA + miB;
+	btScalar factA, factB;
+	if(miS > btScalar(0.f))
+	{
+		factA = miB / miS;
+	}
+	else 
+	{
+		factA = btScalar(0.5f);
+	}
+	factB = btScalar(1.0f) - factA;
+	// get the desired direction of hinge axis
+	// as weighted sum of Z-orthos of frameA and frameB in WCS
+	btVector3 ax1A = trA.getBasis().getColumn(2);
+	btVector3 ax1B = trB.getBasis().getColumn(2);
+	btVector3 ax1 = ax1A * factA + ax1B * factB;
+	ax1.normalize();
+	// fill first 3 rows 
+	// we want: velA + wA x relA == velB + wB x relB
+	btTransform bodyA_trans = transA;
+	btTransform bodyB_trans = transB;
+	int s0 = 0;
+	int s1 = s;
+	int s2 = s * 2;
+	int nrow = 2; // last filled row
+	btVector3 tmpA, tmpB, relA, relB, p, q;
+	// get vector from bodyB to frameB in WCS
+	relB = trB.getOrigin() - bodyB_trans.getOrigin();
+	// get its projection to hinge axis
+	btVector3 projB = ax1 * relB.dot(ax1);
+	// get vector directed from bodyB to hinge axis (and orthogonal to it)
+	btVector3 orthoB = relB - projB;
+	// same for bodyA
+	relA = trA.getOrigin() - bodyA_trans.getOrigin();
+	btVector3 projA = ax1 * relA.dot(ax1);
+	btVector3 orthoA = relA - projA;
+	btVector3 totalDist = projA - projB;
+	// get offset vectors relA and relB
+	relA = orthoA + totalDist * factA;
+	relB = orthoB - totalDist * factB;
+	// now choose average ortho to hinge axis
+	p = orthoB * factA + orthoA * factB;
+	btScalar len2 = p.length2();
+	if(len2 > SIMD_EPSILON)
+	{
+		p /= btSqrt(len2);
+	}
+	else
+	{
+		p = trA.getBasis().getColumn(1);
+	}
+	// make one more ortho
+	q = ax1.cross(p);
+	// fill three rows
+	tmpA = relA.cross(p);
+	tmpB = relB.cross(p);
+    for (i=0; i<3; i++) info->m_J1angularAxis[s0+i] = tmpA[i];
+    for (i=0; i<3; i++) info->m_J2angularAxis[s0+i] = -tmpB[i];
+	tmpA = relA.cross(q);
+	tmpB = relB.cross(q);
+	if(hasStaticBody && getSolveLimit())
+	{ // to make constraint between static and dynamic objects more rigid
+		// remove wA (or wB) from equation if angular limit is hit
+		tmpB *= factB;
+		tmpA *= factA;
+	}
+	for (i=0; i<3; i++) info->m_J1angularAxis[s1+i] = tmpA[i];
+    for (i=0; i<3; i++) info->m_J2angularAxis[s1+i] = -tmpB[i];
+	tmpA = relA.cross(ax1);
+	tmpB = relB.cross(ax1);
+	if(hasStaticBody)
+	{ // to make constraint between static and dynamic objects more rigid
+		// remove wA (or wB) from equation
+		tmpB *= factB;
+		tmpA *= factA;
+	}
+	for (i=0; i<3; i++) info->m_J1angularAxis[s2+i] = tmpA[i];
+    for (i=0; i<3; i++) info->m_J2angularAxis[s2+i] = -tmpB[i];
+
+	btScalar k = info->fps * info->erp;
+
+	if (!m_angularOnly)
+	{
+		for (i=0; i<3; i++) info->m_J1linearAxis[s0+i] = p[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s1+i] = q[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s2+i] = ax1[i];
+	
+	// compute three elements of right hand side
+	
+		btScalar rhs = k * p.dot(ofs);
+		info->m_constraintError[s0] = rhs;
+		rhs = k * q.dot(ofs);
+		info->m_constraintError[s1] = rhs;
+		rhs = k * ax1.dot(ofs);
+		info->m_constraintError[s2] = rhs;
+	}
+	// the hinge axis should be the only unconstrained
+	// rotational axis, the angular velocity of the two bodies perpendicular to
+	// the hinge axis should be equal. thus the constraint equations are
+	//    p*w1 - p*w2 = 0
+	//    q*w1 - q*w2 = 0
+	// where p and q are unit vectors normal to the hinge axis, and w1 and w2
+	// are the angular velocity vectors of the two bodies.
+	int s3 = 3 * s;
+	int s4 = 4 * s;
+	info->m_J1angularAxis[s3 + 0] = p[0];
+	info->m_J1angularAxis[s3 + 1] = p[1];
+	info->m_J1angularAxis[s3 + 2] = p[2];
+	info->m_J1angularAxis[s4 + 0] = q[0];
+	info->m_J1angularAxis[s4 + 1] = q[1];
+	info->m_J1angularAxis[s4 + 2] = q[2];
+
+	info->m_J2angularAxis[s3 + 0] = -p[0];
+	info->m_J2angularAxis[s3 + 1] = -p[1];
+	info->m_J2angularAxis[s3 + 2] = -p[2];
+	info->m_J2angularAxis[s4 + 0] = -q[0];
+	info->m_J2angularAxis[s4 + 1] = -q[1];
+	info->m_J2angularAxis[s4 + 2] = -q[2];
+	// compute the right hand side of the constraint equation. set relative
+	// body velocities along p and q to bring the hinge back into alignment.
+	// if ax1A,ax1B are the unit length hinge axes as computed from bodyA and
+	// bodyB, we need to rotate both bodies along the axis u = (ax1 x ax2).
+	// if "theta" is the angle between ax1 and ax2, we need an angular velocity
+	// along u to cover angle erp*theta in one step :
+	//   |angular_velocity| = angle/time = erp*theta / stepsize
+	//                      = (erp*fps) * theta
+	//    angular_velocity  = |angular_velocity| * (ax1 x ax2) / |ax1 x ax2|
+	//                      = (erp*fps) * theta * (ax1 x ax2) / sin(theta)
+	// ...as ax1 and ax2 are unit length. if theta is smallish,
+	// theta ~= sin(theta), so
+	//    angular_velocity  = (erp*fps) * (ax1 x ax2)
+	// ax1 x ax2 is in the plane space of ax1, so we project the angular
+	// velocity to p and q to find the right hand side.
+	k = info->fps * info->erp;
+	btVector3 u = ax1A.cross(ax1B);
+	info->m_constraintError[s3] = k * u.dot(p);
+	info->m_constraintError[s4] = k * u.dot(q);
+#endif
+	// check angular limits
+	nrow = 4; // last filled row
+	int srow;
+	btScalar limit_err = btScalar(0.0);
+	int limit = 0;
+	if(getSolveLimit())
+	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	limit_err = m_limit.getCorrection() * m_referenceSign;
+#else
+	limit_err = m_correction * m_referenceSign;
+#endif
+	limit = (limit_err > btScalar(0.0)) ? 1 : 2;
+
+	}
+	// if the hinge has joint limits or motor, add in the extra row
+	int powered = 0;
+	if(getEnableAngularMotor())
+	{
+		powered = 1;
+	}
+	if(limit || powered) 
+	{
+		nrow++;
+		srow = nrow * info->rowskip;
+		info->m_J1angularAxis[srow+0] = ax1[0];
+		info->m_J1angularAxis[srow+1] = ax1[1];
+		info->m_J1angularAxis[srow+2] = ax1[2];
+
+		info->m_J2angularAxis[srow+0] = -ax1[0];
+		info->m_J2angularAxis[srow+1] = -ax1[1];
+		info->m_J2angularAxis[srow+2] = -ax1[2];
+
+		btScalar lostop = getLowerLimit();
+		btScalar histop = getUpperLimit();
+		if(limit && (lostop == histop))
+		{  // the joint motor is ineffective
+			powered = 0;
+		}
+		info->m_constraintError[srow] = btScalar(0.0f);
+		btScalar currERP = (m_flags & BT_HINGE_FLAGS_ERP_STOP) ? m_stopERP : info->erp;
+		if(powered)
+		{
+			if(m_flags & BT_HINGE_FLAGS_CFM_NORM)
+			{
+				info->cfm[srow] = m_normalCFM;
+			}
+			btScalar mot_fact = getMotorFactor(m_hingeAngle, lostop, histop, m_motorTargetVelocity, info->fps * currERP);
+			info->m_constraintError[srow] += mot_fact * m_motorTargetVelocity * m_referenceSign;
+			info->m_lowerLimit[srow] = - m_maxMotorImpulse;
+			info->m_upperLimit[srow] =   m_maxMotorImpulse;
+		}
+		if(limit)
+		{
+			k = info->fps * currERP;
+			info->m_constraintError[srow] += k * limit_err;
+			if(m_flags & BT_HINGE_FLAGS_CFM_STOP)
+			{
+				info->cfm[srow] = m_stopCFM;
+			}
+			if(lostop == histop) 
+			{
+				// limited low and high simultaneously
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else if(limit == 1) 
+			{ // low limit
+				info->m_lowerLimit[srow] = 0;
+				info->m_upperLimit[srow] = SIMD_INFINITY;
+			}
+			else 
+			{ // high limit
+				info->m_lowerLimit[srow] = -SIMD_INFINITY;
+				info->m_upperLimit[srow] = 0;
+			}
+			// bounce (we'll use slider parameter abs(1.0 - m_dampingLimAng) for that)
+#ifdef	_BT_USE_CENTER_LIMIT_
+			btScalar bounce = m_limit.getRelaxationFactor();
+#else
+			btScalar bounce = m_relaxationFactor;
+#endif
+			if(bounce > btScalar(0.0))
+			{
+				btScalar vel = angVelA.dot(ax1);
+				vel -= angVelB.dot(ax1);
+				// only apply bounce if the velocity is incoming, and if the
+				// resulting c[] exceeds what we already have.
+				if(limit == 1)
+				{	// low limit
+					if(vel < 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc > info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+				else
+				{	// high limit - all those computations are reversed
+					if(vel > 0)
+					{
+						btScalar newc = -bounce * vel;
+						if(newc < info->m_constraintError[srow])
+						{
+							info->m_constraintError[srow] = newc;
+						}
+					}
+				}
+			}
+#ifdef	_BT_USE_CENTER_LIMIT_
+			info->m_constraintError[srow] *= m_limit.getBiasFactor();
+#else
+			info->m_constraintError[srow] *= m_biasFactor;
+#endif
+		} // if(limit)
+	} // if angular limit or powered
+}
+
+
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btHingeConstraint::setParam(int num, btScalar value, int axis)
+{
+	if((axis == -1) || (axis == 5))
+	{
+		switch(num)
+		{	
+			case BT_CONSTRAINT_STOP_ERP :
+				m_stopERP = value;
+				m_flags |= BT_HINGE_FLAGS_ERP_STOP;
+				break;
+			case BT_CONSTRAINT_STOP_CFM :
+				m_stopCFM = value;
+				m_flags |= BT_HINGE_FLAGS_CFM_STOP;
+				break;
+			case BT_CONSTRAINT_CFM :
+				m_normalCFM = value;
+				m_flags |= BT_HINGE_FLAGS_CFM_NORM;
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+}
+
+///return the local value of parameter
+btScalar btHingeConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal = 0;
+	if((axis == -1) || (axis == 5))
+	{
+		switch(num)
+		{	
+			case BT_CONSTRAINT_STOP_ERP :
+				btAssertConstrParams(m_flags & BT_HINGE_FLAGS_ERP_STOP);
+				retVal = m_stopERP;
+				break;
+			case BT_CONSTRAINT_STOP_CFM :
+				btAssertConstrParams(m_flags & BT_HINGE_FLAGS_CFM_STOP);
+				retVal = m_stopCFM;
+				break;
+			case BT_CONSTRAINT_CFM :
+				btAssertConstrParams(m_flags & BT_HINGE_FLAGS_CFM_NORM);
+				retVal = m_normalCFM;
+				break;
+			default : 
+				btAssertConstrParams(0);
+		}
+	}
+	else
+	{
+		btAssertConstrParams(0);
+	}
+	return retVal;
+}
+
+
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.h
index 270f3f85f..a7f2cca55 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btHingeConstraint.h
@@ -15,8 +15,11 @@ subject to the following restrictions:
 
 /* Hinge Constraint by Dirk Gregorius. Limits added by Marcus Hennix at Starbreeze Studios */
 
-#ifndef HINGECONSTRAINT_H
-#define HINGECONSTRAINT_H
+#ifndef BT_HINGECONSTRAINT_H
+#define BT_HINGECONSTRAINT_H
+
+#define _BT_USE_CENTER_LIMIT_ 1
+
 
 #include "LinearMath/btVector3.h"
 #include "btJacobianEntry.h"
@@ -24,6 +27,24 @@ subject to the following restrictions:
 
 class btRigidBody;
 
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btHingeConstraintData	btHingeConstraintDoubleData
+#define btHingeConstraintDataName	"btHingeConstraintDoubleData"
+#else
+#define btHingeConstraintData	btHingeConstraintFloatData
+#define btHingeConstraintDataName	"btHingeConstraintFloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+
+enum btHingeFlags
+{
+	BT_HINGE_FLAGS_CFM_STOP = 1,
+	BT_HINGE_FLAGS_ERP_STOP = 2,
+	BT_HINGE_FLAGS_CFM_NORM = 4
+};
+
+
 /// hinge constraint between two rigidbodies each with a pivotpoint that descibes the axis location in local space
 /// axis defines the orientation of the hinge axis
 ATTRIBUTE_ALIGNED16(class) btHingeConstraint : public btTypedConstraint
@@ -40,42 +61,55 @@ public:
 	btScalar	m_motorTargetVelocity;
 	btScalar	m_maxMotorImpulse;
 
-	btScalar	m_limitSoftness; 
-	btScalar	m_biasFactor; 
-	btScalar    m_relaxationFactor; 
-
-	btScalar    m_lowerLimit;	
-	btScalar    m_upperLimit;	
-	
-	btScalar	m_kHinge;
 
+#ifdef	_BT_USE_CENTER_LIMIT_
+	btAngularLimit	m_limit;
+#else
+	btScalar	m_lowerLimit;	
+	btScalar	m_upperLimit;	
 	btScalar	m_limitSign;
 	btScalar	m_correction;
 
+	btScalar	m_limitSoftness; 
+	btScalar	m_biasFactor; 
+	btScalar	m_relaxationFactor; 
+
+	bool		m_solveLimit;
+#endif
+
+	btScalar	m_kHinge;
+
+
 	btScalar	m_accLimitImpulse;
 	btScalar	m_hingeAngle;
-	btScalar    m_referenceSign;
+	btScalar	m_referenceSign;
 
 	bool		m_angularOnly;
 	bool		m_enableAngularMotor;
-	bool		m_solveLimit;
 	bool		m_useSolveConstraintObsolete;
+	bool		m_useOffsetForConstraintFrame;
 	bool		m_useReferenceFrameA;
 
 	btScalar	m_accMotorImpulse;
 
+	int			m_flags;
+	btScalar	m_normalCFM;
+	btScalar	m_stopCFM;
+	btScalar	m_stopERP;
+
 	
 public:
 
-	btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB, btVector3& axisInA,btVector3& axisInB, bool useReferenceFrameA = false);
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
+	btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB, const btVector3& axisInA,const btVector3& axisInB, bool useReferenceFrameA = false);
 
-	btHingeConstraint(btRigidBody& rbA,const btVector3& pivotInA,btVector3& axisInA, bool useReferenceFrameA = false);
+	btHingeConstraint(btRigidBody& rbA,const btVector3& pivotInA,const btVector3& axisInA, bool useReferenceFrameA = false);
 	
 	btHingeConstraint(btRigidBody& rbA,btRigidBody& rbB, const btTransform& rbAFrame, const btTransform& rbBFrame, bool useReferenceFrameA = false);
 
 	btHingeConstraint(btRigidBody& rbA,const btTransform& rbAFrame, bool useReferenceFrameA = false);
 
-	btHingeConstraint();
 
 	virtual void	buildJacobian();
 
@@ -88,8 +122,8 @@ public:
 	void	getInfo2NonVirtual(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB);
 
 	void	getInfo2Internal(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB);
+	void	getInfo2InternalUsingFrameOffset(btConstraintInfo2* info,const btTransform& transA,const btTransform& transB,const btVector3& angVelA,const btVector3& angVelB);
 		
-	virtual	void	solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep);
 
 	void	updateRHS(btScalar	timeStep);
 
@@ -110,7 +144,19 @@ public:
 	btRigidBody& getRigidBodyB()	
 	{		
 		return m_rbB;	
-	}	
+	}
+
+	btTransform& getFrameOffsetA()
+	{
+	return m_rbAFrame;
+	}
+
+	btTransform& getFrameOffsetB()
+	{
+		return m_rbBFrame;
+	}
+
+	void setFrames(const btTransform& frameA, const btTransform& frameB);
 	
 	void	setAngularOnly(bool angularOnly)
 	{
@@ -135,13 +181,15 @@ public:
 
 	void	setLimit(btScalar low,btScalar high,btScalar _softness = 0.9f, btScalar _biasFactor = 0.3f, btScalar _relaxationFactor = 1.0f)
 	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+		m_limit.set(low, high, _softness, _biasFactor, _relaxationFactor);
+#else
 		m_lowerLimit = btNormalizeAngle(low);
 		m_upperLimit = btNormalizeAngle(high);
-
 		m_limitSoftness =  _softness;
 		m_biasFactor = _biasFactor;
 		m_relaxationFactor = _relaxationFactor;
-
+#endif
 	}
 
 	void	setAxis(btVector3& axisInA)
@@ -160,21 +208,31 @@ public:
 		btVector3 rbAxisB1 =  quatRotate(rotationArc,rbAxisA1);
 		btVector3 rbAxisB2 = axisInB.cross(rbAxisB1);
 
+		m_rbBFrame.getOrigin() = m_rbB.getCenterOfMassTransform().inverse()(m_rbA.getCenterOfMassTransform()(pivotInA));
 
-		m_rbBFrame.getOrigin() = m_rbA.getCenterOfMassTransform()(pivotInA);
 		m_rbBFrame.getBasis().setValue( rbAxisB1.getX(),rbAxisB2.getX(),axisInB.getX(),
 										rbAxisB1.getY(),rbAxisB2.getY(),axisInB.getY(),
 										rbAxisB1.getZ(),rbAxisB2.getZ(),axisInB.getZ() );
+		m_rbBFrame.getBasis() = m_rbB.getCenterOfMassTransform().getBasis().inverse() * m_rbBFrame.getBasis();
+
 	}
 
 	btScalar	getLowerLimit() const
 	{
-		return m_lowerLimit;
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.getLow();
+#else
+	return m_lowerLimit;
+#endif
 	}
 
 	btScalar	getUpperLimit() const
 	{
-		return m_upperLimit;
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.getHigh();
+#else		
+	return m_upperLimit;
+#endif
 	}
 
 
@@ -193,12 +251,20 @@ public:
 
 	inline int getSolveLimit()
 	{
-		return m_solveLimit;
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.isLimit();
+#else
+	return m_solveLimit;
+#endif
 	}
 
 	inline btScalar getLimitSign()
 	{
+#ifdef	_BT_USE_CENTER_LIMIT_
+	return m_limit.getSign();
+#else
 		return m_limitSign;
+#endif
 	}
 
 	inline bool getAngularOnly() 
@@ -217,7 +283,101 @@ public:
 	{ 
 		return m_maxMotorImpulse; 
 	}
+	// access for UseFrameOffset
+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
+
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 
 };
 
-#endif //HINGECONSTRAINT_H
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btHingeConstraintDoubleData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformDoubleData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformDoubleData m_rbBFrame;
+	int			m_useReferenceFrameA;
+	int			m_angularOnly;
+	int			m_enableAngularMotor;
+	float	m_motorTargetVelocity;
+	float	m_maxMotorImpulse;
+
+	float	m_lowerLimit;
+	float	m_upperLimit;
+	float	m_limitSoftness;
+	float	m_biasFactor;
+	float	m_relaxationFactor;
+
+};
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btHingeConstraintFloatData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformFloatData m_rbBFrame;
+	int			m_useReferenceFrameA;
+	int			m_angularOnly;
+	
+	int			m_enableAngularMotor;
+	float	m_motorTargetVelocity;
+	float	m_maxMotorImpulse;
+
+	float	m_lowerLimit;
+	float	m_upperLimit;
+	float	m_limitSoftness;
+	float	m_biasFactor;
+	float	m_relaxationFactor;
+
+};
+
+
+
+SIMD_FORCE_INLINE	int	btHingeConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btHingeConstraintData);
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btHingeConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btHingeConstraintData* hingeData = (btHingeConstraintData*)dataBuffer;
+	btTypedConstraint::serialize(&hingeData->m_typeConstraintData,serializer);
+
+	m_rbAFrame.serialize(hingeData->m_rbAFrame);
+	m_rbBFrame.serialize(hingeData->m_rbBFrame);
+
+	hingeData->m_angularOnly = m_angularOnly;
+	hingeData->m_enableAngularMotor = m_enableAngularMotor;
+	hingeData->m_maxMotorImpulse = float(m_maxMotorImpulse);
+	hingeData->m_motorTargetVelocity = float(m_motorTargetVelocity);
+	hingeData->m_useReferenceFrameA = m_useReferenceFrameA;
+#ifdef	_BT_USE_CENTER_LIMIT_
+	hingeData->m_lowerLimit = float(m_limit.getLow());
+	hingeData->m_upperLimit = float(m_limit.getHigh());
+	hingeData->m_limitSoftness = float(m_limit.getSoftness());
+	hingeData->m_biasFactor = float(m_limit.getBiasFactor());
+	hingeData->m_relaxationFactor = float(m_limit.getRelaxationFactor());
+#else
+	hingeData->m_lowerLimit = float(m_lowerLimit);
+	hingeData->m_upperLimit = float(m_upperLimit);
+	hingeData->m_limitSoftness = float(m_limitSoftness);
+	hingeData->m_biasFactor = float(m_biasFactor);
+	hingeData->m_relaxationFactor = float(m_relaxationFactor);
+#endif
+
+	return btHingeConstraintDataName;
+}
+
+#endif //BT_HINGECONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btJacobianEntry.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btJacobianEntry.h
index 22a8af66b..125580d19 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btJacobianEntry.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btJacobianEntry.h
@@ -13,11 +13,10 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef JACOBIAN_ENTRY_H
-#define JACOBIAN_ENTRY_H
+#ifndef BT_JACOBIAN_ENTRY_H
+#define BT_JACOBIAN_ENTRY_H
 
-#include "LinearMath/btVector3.h"
-#include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btMatrix3x3.h"
 
 
 //notes:
@@ -153,4 +152,4 @@ public:
 
 };
 
-#endif //JACOBIAN_ENTRY_H
+#endif //BT_JACOBIAN_ENTRY_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp
index 0c58b907d..11b2ec801 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.cpp
@@ -20,14 +20,11 @@ subject to the following restrictions:
 
 
 
-btPoint2PointConstraint::btPoint2PointConstraint()
-:btTypedConstraint(POINT2POINT_CONSTRAINT_TYPE),
-m_useSolveConstraintObsolete(false)
-{
-}
+
 
 btPoint2PointConstraint::btPoint2PointConstraint(btRigidBody& rbA,btRigidBody& rbB, const btVector3& pivotInA,const btVector3& pivotInB)
 :btTypedConstraint(POINT2POINT_CONSTRAINT_TYPE,rbA,rbB),m_pivotInA(pivotInA),m_pivotInB(pivotInB),
+m_flags(0),
 m_useSolveConstraintObsolete(false)
 {
 
@@ -36,6 +33,7 @@ m_useSolveConstraintObsolete(false)
 
 btPoint2PointConstraint::btPoint2PointConstraint(btRigidBody& rbA,const btVector3& pivotInA)
 :btTypedConstraint(POINT2POINT_CONSTRAINT_TYPE,rbA),m_pivotInA(pivotInA),m_pivotInB(rbA.getCenterOfMassTransform()(pivotInA)),
+m_flags(0),
 m_useSolveConstraintObsolete(false)
 {
 	
@@ -126,7 +124,7 @@ void btPoint2PointConstraint::getInfo2NonVirtual (btConstraintInfo2* info, const
 	btVector3 a2 = body1_trans.getBasis()*getPivotInB();
    
 	{
-		btVector3 a2n = -a2;
+	//	btVector3 a2n = -a2;
 		btVector3* angular0 = (btVector3*)(info->m_J2angularAxis);
 		btVector3* angular1 = (btVector3*)(info->m_J2angularAxis+info->rowskip);
 		btVector3* angular2 = (btVector3*)(info->m_J2angularAxis+2*info->rowskip);
@@ -136,14 +134,21 @@ void btPoint2PointConstraint::getInfo2NonVirtual (btConstraintInfo2* info, const
 
 
     // set right hand side
-    btScalar k = info->fps * info->erp;
+	btScalar currERP = (m_flags & BT_P2P_FLAGS_ERP) ? m_erp : info->erp;
+    btScalar k = info->fps * currERP;
     int j;
-
 	for (j=0; j<3; j++)
     {
-        info->m_constraintError[j*info->rowskip] = k * (a2[j] + body1_trans.getOrigin()[j] -                     a1[j] - body0_trans.getOrigin()[j]);
+        info->m_constraintError[j*info->rowskip] = k * (a2[j] + body1_trans.getOrigin()[j] - a1[j] - body0_trans.getOrigin()[j]);
 		//printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]);
     }
+	if(m_flags & BT_P2P_FLAGS_CFM)
+	{
+		for (j=0; j<3; j++)
+		{
+			info->cfm[j*info->rowskip] = m_cfm;
+		}
+	}
 
 	btScalar impulseClamp = m_setting.m_impulseClamp;//
 	for (j=0; j<3; j++)
@@ -154,85 +159,11 @@ void btPoint2PointConstraint::getInfo2NonVirtual (btConstraintInfo2* info, const
 			info->m_upperLimit[j*info->rowskip] = impulseClamp;
 		}
 	}
+	info->m_damping = m_setting.m_damping;
 	
 }
 
 
-void	btPoint2PointConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep)
-{
-
-	if (m_useSolveConstraintObsolete)
-	{
-		btVector3 pivotAInW = m_rbA.getCenterOfMassTransform()*m_pivotInA;
-		btVector3 pivotBInW = m_rbB.getCenterOfMassTransform()*m_pivotInB;
-
-
-		btVector3 normal(0,0,0);
-		
-
-	//	btVector3 angvelA = m_rbA.getCenterOfMassTransform().getBasis().transpose() * m_rbA.getAngularVelocity();
-	//	btVector3 angvelB = m_rbB.getCenterOfMassTransform().getBasis().transpose() * m_rbB.getAngularVelocity();
-
-		for (int i=0;i<3;i++)
-		{		
-			normal[i] = 1;
-			btScalar jacDiagABInv = btScalar(1.) / m_jac[i].getDiagonal();
-
-			btVector3 rel_pos1 = pivotAInW - m_rbA.getCenterOfMassPosition(); 
-			btVector3 rel_pos2 = pivotBInW - m_rbB.getCenterOfMassPosition();
-			//this jacobian entry could be re-used for all iterations
-			
-			btVector3 vel1,vel2;
-			bodyA.getVelocityInLocalPointObsolete(rel_pos1,vel1);
-			bodyB.getVelocityInLocalPointObsolete(rel_pos2,vel2);
-			btVector3 vel = vel1 - vel2;
-			
-			btScalar rel_vel;
-			rel_vel = normal.dot(vel);
-
-		/*
-			//velocity error (first order error)
-			btScalar rel_vel = m_jac[i].getRelativeVelocity(m_rbA.getLinearVelocity(),angvelA,
-															m_rbB.getLinearVelocity(),angvelB);
-		*/
-		
-			//positional error (zeroth order error)
-			btScalar depth = -(pivotAInW - pivotBInW).dot(normal); //this is the error projected on the normal
-			
-			btScalar deltaImpulse = depth*m_setting.m_tau/timeStep  * jacDiagABInv -  m_setting.m_damping * rel_vel * jacDiagABInv;
-
-			btScalar impulseClamp = m_setting.m_impulseClamp;
-			
-			const btScalar sum = btScalar(m_appliedImpulse) + deltaImpulse;
-			if (sum < -impulseClamp)
-			{
-				deltaImpulse = -impulseClamp-m_appliedImpulse;
-				m_appliedImpulse = -impulseClamp;
-			}
-			else if (sum > impulseClamp) 
-			{
-				deltaImpulse = impulseClamp-m_appliedImpulse;
-				m_appliedImpulse = impulseClamp;
-			}
-			else
-			{
-				m_appliedImpulse = sum;
-			}
-
-			
-			btVector3 impulse_vector = normal * deltaImpulse;
-			
-			btVector3 ftorqueAxis1 = rel_pos1.cross(normal);
-			btVector3 ftorqueAxis2 = rel_pos2.cross(normal);
-			bodyA.applyImpulse(normal*m_rbA.getInvMass(), m_rbA.getInvInertiaTensorWorld()*ftorqueAxis1,deltaImpulse);
-			bodyB.applyImpulse(normal*m_rbB.getInvMass(), m_rbB.getInvInertiaTensorWorld()*ftorqueAxis2,-deltaImpulse);
-
-
-			normal[i] = 0;
-		}
-	}
-
-}
 
 void	btPoint2PointConstraint::updateRHS(btScalar	timeStep)
 {
@@ -240,3 +171,60 @@ void	btPoint2PointConstraint::updateRHS(btScalar	timeStep)
 
 }
 
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btPoint2PointConstraint::setParam(int num, btScalar value, int axis)
+{
+	if(axis != -1)
+	{
+		btAssertConstrParams(0);
+	}
+	else
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_ERP :
+			case BT_CONSTRAINT_STOP_ERP :
+				m_erp = value; 
+				m_flags |= BT_P2P_FLAGS_ERP;
+				break;
+			case BT_CONSTRAINT_CFM :
+			case BT_CONSTRAINT_STOP_CFM :
+				m_cfm = value; 
+				m_flags |= BT_P2P_FLAGS_CFM;
+				break;
+			default: 
+				btAssertConstrParams(0);
+		}
+	}
+}
+
+///return the local value of parameter
+btScalar btPoint2PointConstraint::getParam(int num, int axis) const 
+{
+	btScalar retVal(SIMD_INFINITY);
+	if(axis != -1)
+	{
+		btAssertConstrParams(0);
+	}
+	else
+	{
+		switch(num)
+		{
+			case BT_CONSTRAINT_ERP :
+			case BT_CONSTRAINT_STOP_ERP :
+				btAssertConstrParams(m_flags & BT_P2P_FLAGS_ERP);
+				retVal = m_erp; 
+				break;
+			case BT_CONSTRAINT_CFM :
+			case BT_CONSTRAINT_STOP_CFM :
+				btAssertConstrParams(m_flags & BT_P2P_FLAGS_CFM);
+				retVal = m_cfm; 
+				break;
+			default: 
+				btAssertConstrParams(0);
+		}
+	}
+	return retVal;
+}
+	
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h
index dcc194068..1e13416df 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btPoint2PointConstraint.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef POINT2POINTCONSTRAINT_H
-#define POINT2POINTCONSTRAINT_H
+#ifndef BT_POINT2POINTCONSTRAINT_H
+#define BT_POINT2POINTCONSTRAINT_H
 
 #include "LinearMath/btVector3.h"
 #include "btJacobianEntry.h"
@@ -22,6 +22,15 @@ subject to the following restrictions:
 
 class btRigidBody;
 
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btPoint2PointConstraintData	btPoint2PointConstraintDoubleData
+#define btPoint2PointConstraintDataName	"btPoint2PointConstraintDoubleData"
+#else
+#define btPoint2PointConstraintData	btPoint2PointConstraintFloatData
+#define btPoint2PointConstraintDataName	"btPoint2PointConstraintFloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
 struct	btConstraintSetting
 {
 	btConstraintSetting()	:
@@ -35,6 +44,12 @@ struct	btConstraintSetting
 	btScalar		m_impulseClamp;
 };
 
+enum btPoint2PointFlags
+{
+	BT_P2P_FLAGS_ERP = 1,
+	BT_P2P_FLAGS_CFM = 2
+};
+
 /// point to point constraint between two rigidbodies each with a pivotpoint that descibes the 'ballsocket' location in local space
 ATTRIBUTE_ALIGNED16(class) btPoint2PointConstraint : public btTypedConstraint
 {
@@ -46,10 +61,14 @@ public:
 	btVector3	m_pivotInA;
 	btVector3	m_pivotInB;
 	
-	
+	int			m_flags;
+	btScalar	m_erp;
+	btScalar	m_cfm;
 	
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	///for backwards compatibility during the transition to 'getInfo/getInfo2'
 	bool		m_useSolveConstraintObsolete;
 
@@ -59,7 +78,6 @@ public:
 
 	btPoint2PointConstraint(btRigidBody& rbA,const btVector3& pivotInA);
 
-	btPoint2PointConstraint();
 
 	virtual void	buildJacobian();
 
@@ -71,8 +89,6 @@ public:
 
 	void getInfo2NonVirtual (btConstraintInfo2* info, const btTransform& body0_trans, const btTransform& body1_trans);
 
-	virtual	void	solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep);
-
 	void	updateRHS(btScalar	timeStep);
 
 	void	setPivotA(const btVector3& pivotA)
@@ -95,7 +111,53 @@ public:
 		return m_pivotInB;
 	}
 
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 
 };
 
-#endif //POINT2POINTCONSTRAINT_H
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btPoint2PointConstraintFloatData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btVector3FloatData	m_pivotInA;
+	btVector3FloatData	m_pivotInB;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btPoint2PointConstraintDoubleData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btVector3DoubleData	m_pivotInA;
+	btVector3DoubleData	m_pivotInB;
+};
+
+
+SIMD_FORCE_INLINE	int	btPoint2PointConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btPoint2PointConstraintData);
+
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btPoint2PointConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btPoint2PointConstraintData* p2pData = (btPoint2PointConstraintData*)dataBuffer;
+
+	btTypedConstraint::serialize(&p2pData->m_typeConstraintData,serializer);
+	m_pivotInA.serialize(p2pData->m_pivotInA);
+	m_pivotInB.serialize(p2pData->m_pivotInB);
+
+	return btPoint2PointConstraintDataName;
+}
+
+#endif //BT_POINT2POINTCONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
index 560981d52..89cf46cfe 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
@@ -18,24 +18,23 @@ subject to the following restrictions:
 
 #include "btSequentialImpulseConstraintSolver.h"
 #include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
-#include "BulletDynamics/Dynamics/btRigidBody.h"
-#include "btContactConstraint.h"
-#include "btSolve2LinearConstraint.h"
-#include "btContactSolverInfo.h"
+
 #include "LinearMath/btIDebugDraw.h"
-#include "btJacobianEntry.h"
+//#include "btJacobianEntry.h"
 #include "LinearMath/btMinMax.h"
 #include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
 #include <new>
 #include "LinearMath/btStackAlloc.h"
 #include "LinearMath/btQuickprof.h"
-#include "btSolverBody.h"
-#include "btSolverConstraint.h"
+//#include "btSolverBody.h"
+//#include "btSolverConstraint.h"
 #include "LinearMath/btAlignedObjectArray.h"
 #include <string.h> //for memset
 
 int		gNumSplitImpulseRecoveries = 0;
 
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+
 btSequentialImpulseConstraintSolver::btSequentialImpulseConstraintSolver()
 :m_btSeed2(0)
 {
@@ -48,11 +47,11 @@ btSequentialImpulseConstraintSolver::~btSequentialImpulseConstraintSolver()
 
 #ifdef USE_SIMD
 #include <emmintrin.h>
-#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
-static inline __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
+#define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
+static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
 {
 	__m128 result = _mm_mul_ps( vec0, vec1);
-	return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
+	return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
 }
 #endif//USE_SIMD
 
@@ -64,8 +63,8 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
 	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
 	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
 	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse),_mm_set1_ps(c.m_cfm)));
-	__m128 deltaVel1Dotn	=	_mm_add_ps(_vmathVfDot3(c.m_contactNormal.mVec128,body1.m_deltaLinearVelocity.mVec128), _vmathVfDot3(c.m_relpos1CrossNormal.mVec128,body1.m_deltaAngularVelocity.mVec128));
-	__m128 deltaVel2Dotn	=	_mm_sub_ps(_vmathVfDot3(c.m_relpos2CrossNormal.mVec128,body2.m_deltaAngularVelocity.mVec128),_vmathVfDot3((c.m_contactNormal).mVec128,body2.m_deltaLinearVelocity.mVec128));
+	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetDeltaAngularVelocity().mVec128));
+	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetDeltaAngularVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetDeltaLinearVelocity().mVec128));
 	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
 	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
 	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
@@ -78,13 +77,13 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
 	__m128 upperMinApplied = _mm_sub_ps(upperLimit1,cpAppliedImp);
 	deltaImpulse = _mm_or_ps( _mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied) );
 	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultUpperLess, c.m_appliedImpulse), _mm_andnot_ps(resultUpperLess, upperLimit1) );
-	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.m_invMass.mVec128);
-	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.m_invMass.mVec128);
+	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
+	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
 	__m128 impulseMagnitude = deltaImpulse;
-	body1.m_deltaLinearVelocity.mVec128 = _mm_add_ps(body1.m_deltaLinearVelocity.mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
-	body1.m_deltaAngularVelocity.mVec128 = _mm_add_ps(body1.m_deltaAngularVelocity.mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
-	body2.m_deltaLinearVelocity.mVec128 = _mm_sub_ps(body2.m_deltaLinearVelocity.mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
-	body2.m_deltaAngularVelocity.mVec128 = _mm_add_ps(body2.m_deltaAngularVelocity.mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
+	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
+	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
+	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
+	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
 #else
 	resolveSingleConstraintRowGeneric(body1,body2,c);
 #endif
@@ -94,8 +93,8 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
  void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGeneric(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
 {
 	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
-	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.m_deltaLinearVelocity) 	+ c.m_relpos1CrossNormal.dot(body1.m_deltaAngularVelocity);
-	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.m_deltaLinearVelocity) + c.m_relpos2CrossNormal.dot(body2.m_deltaAngularVelocity);
+	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
+	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
 
 //	const btScalar delta_rel_vel	=	deltaVel1Dotn-deltaVel2Dotn;
 	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
@@ -116,8 +115,9 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
 	{
 		c.m_appliedImpulse = sum;
 	}
-		body1.applyImpulse(c.m_contactNormal*body1.m_invMass,c.m_angularComponentA,deltaImpulse);
-		body2.applyImpulse(-c.m_contactNormal*body2.m_invMass,c.m_angularComponentB,deltaImpulse);
+
+	body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+	body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
 }
 
  void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
@@ -127,8 +127,8 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
 	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
 	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
 	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse),_mm_set1_ps(c.m_cfm)));
-	__m128 deltaVel1Dotn	=	_mm_add_ps(_vmathVfDot3(c.m_contactNormal.mVec128,body1.m_deltaLinearVelocity.mVec128), _vmathVfDot3(c.m_relpos1CrossNormal.mVec128,body1.m_deltaAngularVelocity.mVec128));
-	__m128 deltaVel2Dotn	=	_mm_sub_ps(_vmathVfDot3(c.m_relpos2CrossNormal.mVec128,body2.m_deltaAngularVelocity.mVec128),_vmathVfDot3((c.m_contactNormal).mVec128,body2.m_deltaLinearVelocity.mVec128));
+	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetDeltaAngularVelocity().mVec128));
+	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetDeltaAngularVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetDeltaLinearVelocity().mVec128));
 	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
 	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
 	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
@@ -138,13 +138,13 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
 	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
 	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
 	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
-	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.m_invMass.mVec128);
-	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.m_invMass.mVec128);
+	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
+	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
 	__m128 impulseMagnitude = deltaImpulse;
-	body1.m_deltaLinearVelocity.mVec128 = _mm_add_ps(body1.m_deltaLinearVelocity.mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
-	body1.m_deltaAngularVelocity.mVec128 = _mm_add_ps(body1.m_deltaAngularVelocity.mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
-	body2.m_deltaLinearVelocity.mVec128 = _mm_sub_ps(body2.m_deltaLinearVelocity.mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
-	body2.m_deltaAngularVelocity.mVec128 = _mm_add_ps(body2.m_deltaAngularVelocity.mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
+	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
+	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
+	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
+	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
 #else
 	resolveSingleConstraintRowLowerLimit(body1,body2,c);
 #endif
@@ -154,8 +154,8 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
  void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowLowerLimit(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
 {
 	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
-	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.m_deltaLinearVelocity) 	+ c.m_relpos1CrossNormal.dot(body1.m_deltaAngularVelocity);
-	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.m_deltaLinearVelocity) + c.m_relpos2CrossNormal.dot(body2.m_deltaAngularVelocity);
+	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
+	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
 
 	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
 	deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
@@ -169,8 +169,8 @@ void btSequentialImpulseConstraintSolver::resolveSingleConstraintRowGenericSIMD(
 	{
 		c.m_appliedImpulse = sum;
 	}
-	body1.applyImpulse(c.m_contactNormal*body1.m_invMass,c.m_angularComponentA,deltaImpulse);
-	body2.applyImpulse(-c.m_contactNormal*body2.m_invMass,c.m_angularComponentB,deltaImpulse);
+	body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+	body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
 }
 
 
@@ -183,8 +183,8 @@ void	btSequentialImpulseConstraintSolver::resolveSplitPenetrationImpulseCacheFri
         {
 			gNumSplitImpulseRecoveries++;
 			btScalar deltaImpulse = c.m_rhsPenetration-btScalar(c.m_appliedPushImpulse)*c.m_cfm;
-			const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.m_pushVelocity) 	+ c.m_relpos1CrossNormal.dot(body1.m_turnVelocity);
-			const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.m_pushVelocity) + c.m_relpos2CrossNormal.dot(body2.m_turnVelocity);
+			const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetPushVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetTurnVelocity());
+			const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetPushVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetTurnVelocity());
 
 			deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
 			deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
@@ -198,8 +198,8 @@ void	btSequentialImpulseConstraintSolver::resolveSplitPenetrationImpulseCacheFri
 			{
 				c.m_appliedPushImpulse = sum;
 			}
-			body1.internalApplyPushImpulse(c.m_contactNormal*body1.m_invMass,c.m_angularComponentA,deltaImpulse);
-			body2.internalApplyPushImpulse(-c.m_contactNormal*body2.m_invMass,c.m_angularComponentB,deltaImpulse);
+			body1.internalApplyPushImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+			body2.internalApplyPushImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
         }
 }
 
@@ -215,8 +215,8 @@ void	btSequentialImpulseConstraintSolver::resolveSplitPenetrationImpulseCacheFri
 	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
 	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
 	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhsPenetration), _mm_mul_ps(_mm_set1_ps(c.m_appliedPushImpulse),_mm_set1_ps(c.m_cfm)));
-	__m128 deltaVel1Dotn	=	_mm_add_ps(_vmathVfDot3(c.m_contactNormal.mVec128,body1.m_pushVelocity.mVec128), _vmathVfDot3(c.m_relpos1CrossNormal.mVec128,body1.m_turnVelocity.mVec128));
-	__m128 deltaVel2Dotn	=	_mm_sub_ps(_vmathVfDot3(c.m_relpos2CrossNormal.mVec128,body2.m_turnVelocity.mVec128),_vmathVfDot3((c.m_contactNormal).mVec128,body2.m_pushVelocity.mVec128));
+	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetPushVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetTurnVelocity().mVec128));
+	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetTurnVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetPushVelocity().mVec128));
 	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
 	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
 	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
@@ -225,14 +225,14 @@ void	btSequentialImpulseConstraintSolver::resolveSplitPenetrationImpulseCacheFri
 	resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
 	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
 	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
-	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
-	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.m_invMass.mVec128);
-	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.m_invMass.mVec128);
+	c.m_appliedPushImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
+	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
+	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
 	__m128 impulseMagnitude = deltaImpulse;
-	body1.m_pushVelocity.mVec128 = _mm_add_ps(body1.m_pushVelocity.mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
-	body1.m_turnVelocity.mVec128 = _mm_add_ps(body1.m_turnVelocity.mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
-	body2.m_pushVelocity.mVec128 = _mm_sub_ps(body2.m_pushVelocity.mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
-	body2.m_turnVelocity.mVec128 = _mm_add_ps(body2.m_turnVelocity.mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
+	body1.internalGetPushVelocity().mVec128 = _mm_add_ps(body1.internalGetPushVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
+	body1.internalGetTurnVelocity().mVec128 = _mm_add_ps(body1.internalGetTurnVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
+	body2.internalGetPushVelocity().mVec128 = _mm_sub_ps(body2.internalGetPushVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
+	body2.internalGetTurnVelocity().mVec128 = _mm_add_ps(body2.internalGetTurnVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
 #else
 	resolveSplitPenetrationImpulseCacheFriendly(body1,body2,c);
 #endif
@@ -280,30 +280,42 @@ int btSequentialImpulseConstraintSolver::btRandInt2 (int n)
 
 void	btSequentialImpulseConstraintSolver::initSolverBody(btSolverBody* solverBody, btCollisionObject* collisionObject)
 {
+
 	btRigidBody* rb = collisionObject? btRigidBody::upcast(collisionObject) : 0;
 
-	solverBody->m_deltaLinearVelocity.setValue(0.f,0.f,0.f);
-	solverBody->m_deltaAngularVelocity.setValue(0.f,0.f,0.f);
-	solverBody->m_pushVelocity.setValue(0.f,0.f,0.f);
-	solverBody->m_turnVelocity.setValue(0.f,0.f,0.f);
+	solverBody->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+	solverBody->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+	solverBody->internalGetPushVelocity().setValue(0.f,0.f,0.f);
+	solverBody->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
 
 	if (rb)
 	{
-		solverBody->m_invMass = btVector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass())*rb->getLinearFactor();
+		solverBody->m_worldTransform = rb->getWorldTransform();
+		solverBody->internalSetInvMass(btVector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass())*rb->getLinearFactor());
 		solverBody->m_originalBody = rb;
 		solverBody->m_angularFactor = rb->getAngularFactor();
+		solverBody->m_linearFactor = rb->getLinearFactor();
+		solverBody->m_linearVelocity = rb->getLinearVelocity();
+		solverBody->m_angularVelocity = rb->getAngularVelocity();
 	} else
 	{
-		solverBody->m_invMass.setValue(0,0,0);
+		solverBody->m_worldTransform.setIdentity();
+		solverBody->internalSetInvMass(btVector3(0,0,0));
 		solverBody->m_originalBody = 0;
 		solverBody->m_angularFactor.setValue(1,1,1);
+		solverBody->m_linearFactor.setValue(1,1,1);
+		solverBody->m_linearVelocity.setValue(0,0,0);
+		solverBody->m_angularVelocity.setValue(0,0,0);
 	}
+
+
 }
 
 
 
 
 
+
 btScalar btSequentialImpulseConstraintSolver::restitutionCurve(btScalar rel_vel, btScalar restitution)
 {
 	btScalar rest = restitution * -rel_vel;
@@ -312,10 +324,12 @@ btScalar btSequentialImpulseConstraintSolver::restitutionCurve(btScalar rel_vel,
 
 
 
-void	applyAnisotropicFriction(btCollisionObject* colObj,btVector3& frictionDirection);
-void	applyAnisotropicFriction(btCollisionObject* colObj,btVector3& frictionDirection)
+static void	applyAnisotropicFriction(btCollisionObject* colObj,btVector3& frictionDirection, int frictionMode);
+static void	applyAnisotropicFriction(btCollisionObject* colObj,btVector3& frictionDirection, int frictionMode)
 {
-	if (colObj && colObj->hasAnisotropicFriction())
+	
+
+	if (colObj && colObj->hasAnisotropicFriction(frictionMode))
 	{
 		// transform to local coordinates
 		btVector3 loc_lateral = frictionDirection * colObj->getWorldTransform().getBasis();
@@ -325,24 +339,25 @@ void	applyAnisotropicFriction(btCollisionObject* colObj,btVector3& frictionDirec
 		// ... and transform it back to global coordinates
 		frictionDirection = colObj->getWorldTransform().getBasis() * loc_lateral;
 	}
+
 }
 
 
 
-btSolverConstraint&	btSequentialImpulseConstraintSolver::addFrictionConstraint(const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation)
+
+void btSequentialImpulseConstraintSolver::setupFrictionConstraint(btSolverConstraint& solverConstraint, const btVector3& normalAxis,int  solverBodyIdA,int solverBodyIdB,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
 {
 
-
-	btRigidBody* body0=btRigidBody::upcast(colObj0);
-	btRigidBody* body1=btRigidBody::upcast(colObj1);
-
-	btSolverConstraint& solverConstraint = m_tmpSolverContactFrictionConstraintPool.expand();
-	memset(&solverConstraint,0xff,sizeof(btSolverConstraint));
+	
 	solverConstraint.m_contactNormal = normalAxis;
+	btSolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA];
+	btSolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB];
+
+	btRigidBody* body0 = m_tmpSolverBodyPool[solverBodyIdA].m_originalBody;
+	btRigidBody* body1 = m_tmpSolverBodyPool[solverBodyIdB].m_originalBody;
 
 	solverConstraint.m_solverBodyIdA = solverBodyIdA;
 	solverConstraint.m_solverBodyIdB = solverBodyIdB;
-	solverConstraint.m_frictionIndex = frictionIndex;
 
 	solverConstraint.m_friction = cp.m_combinedFriction;
 	solverConstraint.m_originalContactPoint = 0;
@@ -361,73 +376,158 @@ btSolverConstraint&	btSequentialImpulseConstraintSolver::addFrictionConstraint(c
 		solverConstraint.m_angularComponentB = body1 ? body1->getInvInertiaTensorWorld()*ftorqueAxis1*body1->getAngularFactor() : btVector3(0,0,0);
 	}
 
-#ifdef COMPUTE_IMPULSE_DENOM
-	btScalar denom0 = rb0->computeImpulseDenominator(pos1,solverConstraint.m_contactNormal);
-	btScalar denom1 = rb1->computeImpulseDenominator(pos2,solverConstraint.m_contactNormal);
-#else
-	btVector3 vec;
-	btScalar denom0 = 0.f;
-	btScalar denom1 = 0.f;
-	if (body0)
 	{
-		vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
-		denom0 = body0->getInvMass() + normalAxis.dot(vec);
-	}
-	if (body1)
-	{
-		vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
-		denom1 = body1->getInvMass() + normalAxis.dot(vec);
+		btVector3 vec;
+		btScalar denom0 = 0.f;
+		btScalar denom1 = 0.f;
+		if (body0)
+		{
+			vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
+			denom0 = body0->getInvMass() + normalAxis.dot(vec);
+		}
+		if (body1)
+		{
+			vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
+			denom1 = body1->getInvMass() + normalAxis.dot(vec);
+		}
+		btScalar denom = relaxation/(denom0+denom1);
+		solverConstraint.m_jacDiagABInv = denom;
 	}
 
-
-#endif //COMPUTE_IMPULSE_DENOM
-	btScalar denom = relaxation/(denom0+denom1);
-	solverConstraint.m_jacDiagABInv = denom;
-
-#ifdef _USE_JACOBIAN
-	solverConstraint.m_jac =  btJacobianEntry (
-		rel_pos1,rel_pos2,solverConstraint.m_contactNormal,
-		body0->getInvInertiaDiagLocal(),
-		body0->getInvMass(),
-		body1->getInvInertiaDiagLocal(),
-		body1->getInvMass());
-#endif //_USE_JACOBIAN
-
-
 	{
+		
+
 		btScalar rel_vel;
-		btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?body0->getLinearVelocity():btVector3(0,0,0)) 
-			+ solverConstraint.m_relpos1CrossNormal.dot(body0?body0->getAngularVelocity():btVector3(0,0,0));
-		btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?body1->getLinearVelocity():btVector3(0,0,0)) 
-			+ solverConstraint.m_relpos2CrossNormal.dot(body1?body1->getAngularVelocity():btVector3(0,0,0));
+		btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:btVector3(0,0,0)) 
+			+ solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:btVector3(0,0,0));
+		btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:btVector3(0,0,0)) 
+			+ solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:btVector3(0,0,0));
 
 		rel_vel = vel1Dotn+vel2Dotn;
 
 //		btScalar positionalError = 0.f;
 
-		btSimdScalar velocityError =  - rel_vel;
+		btSimdScalar velocityError =  desiredVelocity - rel_vel;
 		btSimdScalar	velocityImpulse = velocityError * btSimdScalar(solverConstraint.m_jacDiagABInv);
 		solverConstraint.m_rhs = velocityImpulse;
-		solverConstraint.m_cfm = 0.f;
+		solverConstraint.m_cfm = cfmSlip;
 		solverConstraint.m_lowerLimit = 0;
 		solverConstraint.m_upperLimit = 1e10f;
+		
 	}
+}
 
+btSolverConstraint&	btSequentialImpulseConstraintSolver::addFrictionConstraint(const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
+{
+	btSolverConstraint& solverConstraint = m_tmpSolverContactFrictionConstraintPool.expandNonInitializing();
+	solverConstraint.m_frictionIndex = frictionIndex;
+	setupFrictionConstraint(solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2, 
+							colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
 	return solverConstraint;
 }
 
+
+void btSequentialImpulseConstraintSolver::setupRollingFrictionConstraint(	btSolverConstraint& solverConstraint, const btVector3& normalAxis1,int solverBodyIdA,int  solverBodyIdB,
+									btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
+									btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, 
+									btScalar desiredVelocity, btScalar cfmSlip)
+
+{
+	btVector3 normalAxis(0,0,0);
+
+
+	solverConstraint.m_contactNormal = normalAxis;
+	btSolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA];
+	btSolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB];
+
+	btRigidBody* body0 = m_tmpSolverBodyPool[solverBodyIdA].m_originalBody;
+	btRigidBody* body1 = m_tmpSolverBodyPool[solverBodyIdB].m_originalBody;
+
+	solverConstraint.m_solverBodyIdA = solverBodyIdA;
+	solverConstraint.m_solverBodyIdB = solverBodyIdB;
+
+	solverConstraint.m_friction = cp.m_combinedRollingFriction;
+	solverConstraint.m_originalContactPoint = 0;
+
+	solverConstraint.m_appliedImpulse = 0.f;
+	solverConstraint.m_appliedPushImpulse = 0.f;
+
+	{
+		btVector3 ftorqueAxis1 = -normalAxis1;
+		solverConstraint.m_relpos1CrossNormal = ftorqueAxis1;
+		solverConstraint.m_angularComponentA = body0 ? body0->getInvInertiaTensorWorld()*ftorqueAxis1*body0->getAngularFactor() : btVector3(0,0,0);
+	}
+	{
+		btVector3 ftorqueAxis1 = normalAxis1;
+		solverConstraint.m_relpos2CrossNormal = ftorqueAxis1;
+		solverConstraint.m_angularComponentB = body1 ? body1->getInvInertiaTensorWorld()*ftorqueAxis1*body1->getAngularFactor() : btVector3(0,0,0);
+	}
+
+
+	{
+		btVector3 iMJaA = body0?body0->getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal:btVector3(0,0,0);
+		btVector3 iMJaB = body1?body1->getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal:btVector3(0,0,0);
+		btScalar sum = 0;
+		sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
+		sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
+		solverConstraint.m_jacDiagABInv = btScalar(1.)/sum;
+	}
+
+	{
+		
+
+		btScalar rel_vel;
+		btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:btVector3(0,0,0)) 
+			+ solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:btVector3(0,0,0));
+		btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:btVector3(0,0,0)) 
+			+ solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:btVector3(0,0,0));
+
+		rel_vel = vel1Dotn+vel2Dotn;
+
+//		btScalar positionalError = 0.f;
+
+		btSimdScalar velocityError =  desiredVelocity - rel_vel;
+		btSimdScalar	velocityImpulse = velocityError * btSimdScalar(solverConstraint.m_jacDiagABInv);
+		solverConstraint.m_rhs = velocityImpulse;
+		solverConstraint.m_cfm = cfmSlip;
+		solverConstraint.m_lowerLimit = 0;
+		solverConstraint.m_upperLimit = 1e10f;
+		
+	}
+}
+
+
+
+
+
+
+
+
+btSolverConstraint&	btSequentialImpulseConstraintSolver::addRollingFrictionConstraint(const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
+{
+	btSolverConstraint& solverConstraint = m_tmpSolverContactRollingFrictionConstraintPool.expandNonInitializing();
+	solverConstraint.m_frictionIndex = frictionIndex;
+	setupRollingFrictionConstraint(solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2, 
+							colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
+	return solverConstraint;
+}
+
+
 int	btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject& body)
 {
+
 	int solverBodyIdA = -1;
 
 	if (body.getCompanionId() >= 0)
 	{
 		//body has already been converted
 		solverBodyIdA = body.getCompanionId();
+        btAssert(solverBodyIdA < m_tmpSolverBodyPool.size());
 	} else
 	{
 		btRigidBody* rb = btRigidBody::upcast(&body);
-		if (rb && rb->getInvMass())
+		//convert both active and kinematic objects (for their velocity)
+		if (rb && (rb->getInvMass() || rb->isKinematicObject()))
 		{
 			solverBodyIdA = m_tmpSolverBodyPool.size();
 			btSolverBody& solverBody = m_tmpSolverBodyPool.expand();
@@ -438,71 +538,41 @@ int	btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject&
 			return 0;//assume first one is a fixed solver body
 		}
 	}
+
 	return solverBodyIdA;
+
 }
 #include <stdio.h>
 
 
-
-void	btSequentialImpulseConstraintSolver::convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal)
+void btSequentialImpulseConstraintSolver::setupContactConstraint(btSolverConstraint& solverConstraint, 
+																 int solverBodyIdA, int solverBodyIdB,
+																 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal,
+																 btVector3& vel, btScalar& rel_vel, btScalar& relaxation,
+																 btVector3& rel_pos1, btVector3& rel_pos2)
 {
-	btCollisionObject* colObj0=0,*colObj1=0;
-
-	colObj0 = (btCollisionObject*)manifold->getBody0();
-	colObj1 = (btCollisionObject*)manifold->getBody1();
-
-	int solverBodyIdA=-1;
-	int solverBodyIdB=-1;
-
-	if (manifold->getNumContacts())
-	{
-		solverBodyIdA = getOrInitSolverBody(*colObj0);
-		solverBodyIdB = getOrInitSolverBody(*colObj1);
-	}
-
-	///avoid collision response between two static objects
-	if (!solverBodyIdA && !solverBodyIdB)
-		return;
-
-	btVector3 rel_pos1;
-	btVector3 rel_pos2;
-	btScalar relaxation;
-
-	for (int j=0;j<manifold->getNumContacts();j++)
-	{
-
-		btManifoldPoint& cp = manifold->getContactPoint(j);
-
-		if (cp.getDistance() <= manifold->getContactProcessingThreshold())
-		{
-
+			
 			const btVector3& pos1 = cp.getPositionWorldOnA();
 			const btVector3& pos2 = cp.getPositionWorldOnB();
 
-			rel_pos1 = pos1 - colObj0->getWorldTransform().getOrigin(); 
-			rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
+			btSolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA];
+			btSolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB];
 
+			btRigidBody* rb0 = bodyA->m_originalBody;
+			btRigidBody* rb1 = bodyB->m_originalBody;
+
+//			btVector3 rel_pos1 = pos1 - colObj0->getWorldTransform().getOrigin(); 
+//			btVector3 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
+			rel_pos1 = pos1 - bodyA->getWorldTransform().getOrigin(); 
+			rel_pos2 = pos2 - bodyB->getWorldTransform().getOrigin();
 
 			relaxation = 1.f;
-			btScalar rel_vel;
-			btVector3 vel;
 
-			int frictionIndex = m_tmpSolverContactConstraintPool.size();
+			btVector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB);
+			solverConstraint.m_angularComponentA = rb0 ? rb0->getInvInertiaTensorWorld()*torqueAxis0*rb0->getAngularFactor() : btVector3(0,0,0);
+			btVector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB);		
+			solverConstraint.m_angularComponentB = rb1 ? rb1->getInvInertiaTensorWorld()*-torqueAxis1*rb1->getAngularFactor() : btVector3(0,0,0);
 
-			{
-				btSolverConstraint& solverConstraint = m_tmpSolverContactConstraintPool.expand();
-				btRigidBody* rb0 = btRigidBody::upcast(colObj0);
-				btRigidBody* rb1 = btRigidBody::upcast(colObj1);
-
-				solverConstraint.m_solverBodyIdA = solverBodyIdA;
-				solverConstraint.m_solverBodyIdB = solverBodyIdB;
-
-				solverConstraint.m_originalContactPoint = &cp;
-
-				btVector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB);
-				solverConstraint.m_angularComponentA = rb0 ? rb0->getInvInertiaTensorWorld()*torqueAxis0*rb0->getAngularFactor() : btVector3(0,0,0);
-				btVector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB);		
-				solverConstraint.m_angularComponentB = rb1 ? rb1->getInvInertiaTensorWorld()*-torqueAxis1*rb1->getAngularFactor() : btVector3(0,0,0);
 				{
 #ifdef COMPUTE_IMPULSE_DENOM
 					btScalar denom0 = rb0->computeImpulseDenominator(pos1,cp.m_normalWorldOnB);
@@ -528,29 +598,27 @@ void	btSequentialImpulseConstraintSolver::convertContact(btPersistentManifold* m
 				}
 
 				solverConstraint.m_contactNormal = cp.m_normalWorldOnB;
-				solverConstraint.m_relpos1CrossNormal = rel_pos1.cross(cp.m_normalWorldOnB);
-				solverConstraint.m_relpos2CrossNormal = rel_pos2.cross(-cp.m_normalWorldOnB);
-
-
-				btVector3 vel1 = rb0 ? rb0->getVelocityInLocalPoint(rel_pos1) : btVector3(0,0,0);
-				btVector3 vel2 = rb1 ? rb1->getVelocityInLocalPoint(rel_pos2) : btVector3(0,0,0);
-
-				vel  = vel1 - vel2;
-
-				rel_vel = cp.m_normalWorldOnB.dot(vel);
-
-				btScalar penetration = cp.getDistance()+infoGlobal.m_linearSlop;
-
-
-				solverConstraint.m_friction = cp.m_combinedFriction;
+				solverConstraint.m_relpos1CrossNormal = torqueAxis0;
+				solverConstraint.m_relpos2CrossNormal = -torqueAxis1;
 
 				btScalar restitution = 0.f;
+				btScalar penetration = cp.getDistance()+infoGlobal.m_linearSlop;
+
+				{
+					btVector3 vel1,vel2;
+
+					vel1 = rb0? rb0->getVelocityInLocalPoint(rel_pos1) : btVector3(0,0,0);
+					vel2 = rb1? rb1->getVelocityInLocalPoint(rel_pos2) : btVector3(0,0,0);
+
+	//			btVector3 vel2 = rb1 ? rb1->getVelocityInLocalPoint(rel_pos2) : btVector3(0,0,0);
+					vel  = vel1 - vel2;
+					rel_vel = cp.m_normalWorldOnB.dot(vel);
+
+					
+
+					solverConstraint.m_friction = cp.m_combinedFriction;
+
 				
-				if (cp.m_lifeTime>infoGlobal.m_restingContactRestitutionThreshold)
-				{
-					restitution = 0.f;
-				} else
-				{
 					restitution =  restitutionCurve(rel_vel, cp.m_combinedRestitution);
 					if (restitution <= btScalar(0.))
 					{
@@ -564,9 +632,9 @@ void	btSequentialImpulseConstraintSolver::convertContact(btPersistentManifold* m
 				{
 					solverConstraint.m_appliedImpulse = cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor;
 					if (rb0)
-						m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdA].applyImpulse(solverConstraint.m_contactNormal*rb0->getInvMass()*rb0->getLinearFactor(),solverConstraint.m_angularComponentA,solverConstraint.m_appliedImpulse);
+						bodyA->internalApplyImpulse(solverConstraint.m_contactNormal*bodyA->internalGetInvMass()*rb0->getLinearFactor(),solverConstraint.m_angularComponentA,solverConstraint.m_appliedImpulse);
 					if (rb1)
-						m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdB].applyImpulse(solverConstraint.m_contactNormal*rb1->getInvMass()*rb1->getLinearFactor(),-solverConstraint.m_angularComponentB,-solverConstraint.m_appliedImpulse);
+						bodyB->internalApplyImpulse(solverConstraint.m_contactNormal*bodyB->internalGetInvMass()*rb1->getLinearFactor(),-solverConstraint.m_angularComponentB,-(btScalar)solverConstraint.m_appliedImpulse);
 				} else
 				{
 					solverConstraint.m_appliedImpulse = 0.f;
@@ -575,24 +643,41 @@ void	btSequentialImpulseConstraintSolver::convertContact(btPersistentManifold* m
 				solverConstraint.m_appliedPushImpulse = 0.f;
 
 				{
-					btScalar rel_vel;
-					btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0?rb0->getLinearVelocity():btVector3(0,0,0)) 
-						+ solverConstraint.m_relpos1CrossNormal.dot(rb0?rb0->getAngularVelocity():btVector3(0,0,0));
-					btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1?rb1->getLinearVelocity():btVector3(0,0,0)) 
-						+ solverConstraint.m_relpos2CrossNormal.dot(rb1?rb1->getAngularVelocity():btVector3(0,0,0));
-
-					rel_vel = vel1Dotn+vel2Dotn;
+					btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0?bodyA->m_linearVelocity:btVector3(0,0,0)) 
+						+ solverConstraint.m_relpos1CrossNormal.dot(rb0?bodyA->m_angularVelocity:btVector3(0,0,0));
+					btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1?bodyB->m_linearVelocity:btVector3(0,0,0)) 
+						+ solverConstraint.m_relpos2CrossNormal.dot(rb1?bodyB->m_angularVelocity:btVector3(0,0,0));
+					btScalar rel_vel = vel1Dotn+vel2Dotn;
 
 					btScalar positionalError = 0.f;
-					positionalError = -penetration * infoGlobal.m_erp/infoGlobal.m_timeStep;
 					btScalar	velocityError = restitution - rel_vel;// * damping;
+					
+
+					btScalar erp = infoGlobal.m_erp2;
+					if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
+					{
+						erp = infoGlobal.m_erp;
+					}
+
+					if (penetration>0)
+					{
+						positionalError = 0;
+
+						velocityError -= penetration / infoGlobal.m_timeStep;
+					} else
+					{
+						positionalError = -penetration * erp/infoGlobal.m_timeStep;
+					}
+
 					btScalar  penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
 					btScalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
+
 					if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
 					{
 						//combine position and velocity into rhs
 						solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
 						solverConstraint.m_rhsPenetration = 0.f;
+
 					} else
 					{
 						//split position and velocity into rhs and m_rhsPenetration
@@ -605,122 +690,329 @@ void	btSequentialImpulseConstraintSolver::convertContact(btPersistentManifold* m
 				}
 
 
-				/////setup the friction constraints
+
+
+}
 
 
 
-				if (1)
-				{
-					solverConstraint.m_frictionIndex = m_tmpSolverContactFrictionConstraintPool.size();
-					if (!(infoGlobal.m_solverMode & SOLVER_ENABLE_FRICTION_DIRECTION_CACHING) || !cp.m_lateralFrictionInitialized)
-					{
-						cp.m_lateralFrictionDir1 = vel - cp.m_normalWorldOnB * rel_vel;
-						btScalar lat_rel_vel = cp.m_lateralFrictionDir1.length2();
-						if (!(infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION) && lat_rel_vel > SIMD_EPSILON)
-						{
-							cp.m_lateralFrictionDir1 /= btSqrt(lat_rel_vel);
-							if((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-							{
-								cp.m_lateralFrictionDir2 = cp.m_lateralFrictionDir1.cross(cp.m_normalWorldOnB);
-								cp.m_lateralFrictionDir2.normalize();//??
-								applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2);
-								applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2);
-								addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-							}
+void btSequentialImpulseConstraintSolver::setFrictionConstraintImpulse( btSolverConstraint& solverConstraint, 
+																		int solverBodyIdA, int solverBodyIdB,
+																 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal)
+{
 
-							applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1);
-							applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1);
-							addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-							cp.m_lateralFrictionInitialized = true;
-						} else
-						{
-							//re-calculate friction direction every frame, todo: check if this is really needed
-							btPlaneSpace1(cp.m_normalWorldOnB,cp.m_lateralFrictionDir1,cp.m_lateralFrictionDir2);
-							if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-							{
-								applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2);
-								applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2);
-								addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-							}
+	btSolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA];
+	btSolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB];
 
-							applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1);
-							applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1);
-							addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-
-							cp.m_lateralFrictionInitialized = true;
-						}
-
-					} else
-					{
-						addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-						if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-							addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-					}
-
-					if (infoGlobal.m_solverMode & SOLVER_USE_FRICTION_WARMSTARTING)
-					{
-						{
-							btSolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex];
-							if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
-							{
-								frictionConstraint1.m_appliedImpulse = cp.m_appliedImpulseLateral1 * infoGlobal.m_warmstartingFactor;
-								if (rb0)
-									m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdA].applyImpulse(frictionConstraint1.m_contactNormal*rb0->getInvMass()*rb0->getLinearFactor(),frictionConstraint1.m_angularComponentA,frictionConstraint1.m_appliedImpulse);
-								if (rb1)
-									m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdB].applyImpulse(frictionConstraint1.m_contactNormal*rb1->getInvMass()*rb1->getLinearFactor(),-frictionConstraint1.m_angularComponentB,-frictionConstraint1.m_appliedImpulse);
-							} else
-							{
-								frictionConstraint1.m_appliedImpulse = 0.f;
-							}
-						}
-
-						if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-						{
-							btSolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex+1];
-							if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
-							{
-								frictionConstraint2.m_appliedImpulse = cp.m_appliedImpulseLateral2 * infoGlobal.m_warmstartingFactor;
-								if (rb0)
-									m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdA].applyImpulse(frictionConstraint2.m_contactNormal*rb0->getInvMass(),frictionConstraint2.m_angularComponentA,frictionConstraint2.m_appliedImpulse);
-								if (rb1)
-									m_tmpSolverBodyPool[solverConstraint.m_solverBodyIdB].applyImpulse(frictionConstraint2.m_contactNormal*rb1->getInvMass(),-frictionConstraint2.m_angularComponentB,-frictionConstraint2.m_appliedImpulse);
-							} else
-							{
-								frictionConstraint2.m_appliedImpulse = 0.f;
-							}
-						}
-					} else
-					{
-						btSolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex];
-						frictionConstraint1.m_appliedImpulse = 0.f;
-						if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-						{
-							btSolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex+1];
-							frictionConstraint2.m_appliedImpulse = 0.f;
-						}
-					}
-				}
-			}
+	btRigidBody* rb0 = bodyA->m_originalBody;
+	btRigidBody* rb1 = bodyB->m_originalBody;
 
+	{
+		btSolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex];
+		if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+		{
+			frictionConstraint1.m_appliedImpulse = cp.m_appliedImpulseLateral1 * infoGlobal.m_warmstartingFactor;
+			if (rb0)
+				bodyA->internalApplyImpulse(frictionConstraint1.m_contactNormal*rb0->getInvMass()*rb0->getLinearFactor(),frictionConstraint1.m_angularComponentA,frictionConstraint1.m_appliedImpulse);
+			if (rb1)
+				bodyB->internalApplyImpulse(frictionConstraint1.m_contactNormal*rb1->getInvMass()*rb1->getLinearFactor(),-frictionConstraint1.m_angularComponentB,-(btScalar)frictionConstraint1.m_appliedImpulse);
+		} else
+		{
+			frictionConstraint1.m_appliedImpulse = 0.f;
+		}
+	}
 
+	if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+	{
+		btSolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex+1];
+		if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+		{
+			frictionConstraint2.m_appliedImpulse = cp.m_appliedImpulseLateral2  * infoGlobal.m_warmstartingFactor;
+			if (rb0)
+				bodyA->internalApplyImpulse(frictionConstraint2.m_contactNormal*rb0->getInvMass(),frictionConstraint2.m_angularComponentA,frictionConstraint2.m_appliedImpulse);
+			if (rb1)
+				bodyB->internalApplyImpulse(frictionConstraint2.m_contactNormal*rb1->getInvMass(),-frictionConstraint2.m_angularComponentB,-(btScalar)frictionConstraint2.m_appliedImpulse);
+		} else
+		{
+			frictionConstraint2.m_appliedImpulse = 0.f;
 		}
 	}
 }
 
 
-btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** /*bodies */,int /*numBodies */,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+
+
+void	btSequentialImpulseConstraintSolver::convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal)
+{
+	btCollisionObject* colObj0=0,*colObj1=0;
+
+	colObj0 = (btCollisionObject*)manifold->getBody0();
+	colObj1 = (btCollisionObject*)manifold->getBody1();
+
+	int solverBodyIdA = getOrInitSolverBody(*colObj0);
+	int solverBodyIdB = getOrInitSolverBody(*colObj1);
+
+//	btRigidBody* bodyA = btRigidBody::upcast(colObj0);
+//	btRigidBody* bodyB = btRigidBody::upcast(colObj1);
+
+	btSolverBody* solverBodyA = &m_tmpSolverBodyPool[solverBodyIdA];
+	btSolverBody* solverBodyB = &m_tmpSolverBodyPool[solverBodyIdB];
+
+
+
+	///avoid collision response between two static objects
+	if (!solverBodyA || (!solverBodyA->m_originalBody && (!solverBodyB || !solverBodyB->m_originalBody)))
+		return;
+
+	int rollingFriction=1;
+	for (int j=0;j<manifold->getNumContacts();j++)
+	{
+
+		btManifoldPoint& cp = manifold->getContactPoint(j);
+
+		if (cp.getDistance() <= manifold->getContactProcessingThreshold())
+		{
+			btVector3 rel_pos1;
+			btVector3 rel_pos2;
+			btScalar relaxation;
+			btScalar rel_vel;
+			btVector3 vel;
+
+			int frictionIndex = m_tmpSolverContactConstraintPool.size();
+			btSolverConstraint& solverConstraint = m_tmpSolverContactConstraintPool.expandNonInitializing();
+//			btRigidBody* rb0 = btRigidBody::upcast(colObj0);
+//			btRigidBody* rb1 = btRigidBody::upcast(colObj1);
+			solverConstraint.m_solverBodyIdA = solverBodyIdA;
+			solverConstraint.m_solverBodyIdB = solverBodyIdB;
+
+			solverConstraint.m_originalContactPoint = &cp;
+
+			setupContactConstraint(solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, vel, rel_vel, relaxation, rel_pos1, rel_pos2);
+
+//			const btVector3& pos1 = cp.getPositionWorldOnA();
+//			const btVector3& pos2 = cp.getPositionWorldOnB();
+
+			/////setup the friction constraints
+
+			solverConstraint.m_frictionIndex = m_tmpSolverContactFrictionConstraintPool.size();
+
+			btVector3 angVelA,angVelB;
+			solverBodyA->getAngularVelocity(angVelA);
+			solverBodyB->getAngularVelocity(angVelB);			
+			btVector3 relAngVel = angVelB-angVelA;
+
+			if ((cp.m_combinedRollingFriction>0.f) && (rollingFriction>0))
+			{
+				//only a single rollingFriction per manifold
+				rollingFriction--;
+				if (relAngVel.length()>infoGlobal.m_singleAxisRollingFrictionThreshold)
+				{
+					relAngVel.normalize();
+					applyAnisotropicFriction(colObj0,relAngVel,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj1,relAngVel,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					if (relAngVel.length()>0.001)
+						addRollingFrictionConstraint(relAngVel,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+				} else
+				{
+					addRollingFrictionConstraint(cp.m_normalWorldOnB,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					btVector3 axis0,axis1;
+					btPlaneSpace1(cp.m_normalWorldOnB,axis0,axis1);
+					applyAnisotropicFriction(colObj0,axis0,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj1,axis0,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj0,axis1,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					applyAnisotropicFriction(colObj1,axis1,btCollisionObject::CF_ANISOTROPIC_ROLLING_FRICTION);
+					if (axis0.length()>0.001)
+						addRollingFrictionConstraint(axis0,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					if (axis1.length()>0.001)
+						addRollingFrictionConstraint(axis1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+		
+				}
+			}
+
+			///Bullet has several options to set the friction directions
+			///By default, each contact has only a single friction direction that is recomputed automatically very frame 
+			///based on the relative linear velocity.
+			///If the relative velocity it zero, it will automatically compute a friction direction.
+			
+			///You can also enable two friction directions, using the SOLVER_USE_2_FRICTION_DIRECTIONS.
+			///In that case, the second friction direction will be orthogonal to both contact normal and first friction direction.
+			///
+			///If you choose SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION, then the friction will be independent from the relative projected velocity.
+			///
+			///The user can manually override the friction directions for certain contacts using a contact callback, 
+			///and set the cp.m_lateralFrictionInitialized to true
+			///In that case, you can set the target relative motion in each friction direction (cp.m_contactMotion1 and cp.m_contactMotion2)
+			///this will give a conveyor belt effect
+			///
+			if (!(infoGlobal.m_solverMode & SOLVER_ENABLE_FRICTION_DIRECTION_CACHING) || !cp.m_lateralFrictionInitialized)
+			{
+				cp.m_lateralFrictionDir1 = vel - cp.m_normalWorldOnB * rel_vel;
+				btScalar lat_rel_vel = cp.m_lateralFrictionDir1.length2();
+				if (!(infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION) && lat_rel_vel > SIMD_EPSILON)
+				{
+					cp.m_lateralFrictionDir1 *= 1.f/btSqrt(lat_rel_vel);
+					if((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					{
+						cp.m_lateralFrictionDir2 = cp.m_lateralFrictionDir1.cross(cp.m_normalWorldOnB);
+						cp.m_lateralFrictionDir2.normalize();//??
+						applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+					}
+
+					applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+				} else
+				{
+					btPlaneSpace1(cp.m_normalWorldOnB,cp.m_lateralFrictionDir1,cp.m_lateralFrictionDir2);
+
+					if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					{
+						applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir2,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+						addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+					}
+
+					applyAnisotropicFriction(colObj0,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					applyAnisotropicFriction(colObj1,cp.m_lateralFrictionDir1,btCollisionObject::CF_ANISOTROPIC_FRICTION);
+					addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
+
+					if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION))
+					{
+						cp.m_lateralFrictionInitialized = true;
+					}
+				}
+
+			} else
+			{
+				addFrictionConstraint(cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation,cp.m_contactMotion1, cp.m_contactCFM1);
+
+				if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+					addFrictionConstraint(cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, cp.m_contactMotion2, cp.m_contactCFM2);
+
+				setFrictionConstraintImpulse( solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal);
+			}
+		
+
+			
+
+		}
+	}
+}
+
+btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
 {
 	BT_PROFILE("solveGroupCacheFriendlySetup");
 	(void)stackAlloc;
 	(void)debugDrawer;
 
+	m_maxOverrideNumSolverIterations = 0;
 
-	if (!(numConstraints + numManifolds))
+#ifdef BT_DEBUG
+	 //make sure that dynamic bodies exist for all (enabled) constraints
+	for (int i=0;i<numConstraints;i++)
 	{
-		//		printf("empty\n");
-		return 0.f;
+		btTypedConstraint* constraint = constraints[i];
+		if (constraint->isEnabled())
+		{
+			if (!constraint->getRigidBodyA().isStaticOrKinematicObject())
+			{
+				bool found=false;
+				for (int b=0;b<numBodies;b++)
+				{
+                
+					if (&constraint->getRigidBodyA()==bodies[b])
+					{
+						found = true;
+						break;
+					}
+				}
+				btAssert(found);
+			}
+			if (!constraint->getRigidBodyB().isStaticOrKinematicObject())
+			{
+				bool found=false;
+				for (int b=0;b<numBodies;b++)
+				{
+					if (&constraint->getRigidBodyB()==bodies[b])
+					{
+						found = true;
+						break;
+					}
+				}
+				btAssert(found);
+			}
+		}
+	}
+    //make sure that dynamic bodies exist for all contact manifolds
+    for (int i=0;i<numManifolds;i++)
+    {
+        if (!manifoldPtr[i]->getBody0()->isStaticOrKinematicObject())
+        {
+            bool found=false;
+            for (int b=0;b<numBodies;b++)
+            {
+                
+                if (manifoldPtr[i]->getBody0()==bodies[b])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            btAssert(found);
+        }
+        if (!manifoldPtr[i]->getBody1()->isStaticOrKinematicObject())
+        {
+            bool found=false;
+            for (int b=0;b<numBodies;b++)
+            {
+                if (manifoldPtr[i]->getBody1()==bodies[b])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            btAssert(found);
+        }
+    }
+#endif //BT_DEBUG
+	
+	
+	for (int i = 0; i < numBodies; i++)
+	{
+		bodies[i]->setCompanionId(-1);
 	}
 
+
+	m_tmpSolverBodyPool.reserve(numBodies+1);
+	m_tmpSolverBodyPool.resize(0);
+
+	btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
+    initSolverBody(&fixedBody,0);
+
+	//convert all bodies
+
+	for (int i=0;i<numBodies;i++)
+	{
+		int bodyId = getOrInitSolverBody(*bodies[i]);
+		btRigidBody* body = btRigidBody::upcast(bodies[i]);
+		if (body && body->getInvMass())
+		{
+			btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId];
+			btVector3 gyroForce (0,0,0);
+			if (body->getFlags()&BT_ENABLE_GYROPSCOPIC_FORCE)
+			{
+				gyroForce = body->computeGyroscopicForce(infoGlobal.m_maxGyroscopicForce);
+			}
+			solverBody.m_linearVelocity += body->getTotalForce()*body->getInvMass()*infoGlobal.m_timeStep;
+			solverBody.m_angularVelocity += (body->getTotalTorque()-gyroForce)*body->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
+		}
+	}
+	
 	if (1)
 	{
 		int j;
@@ -728,12 +1020,10 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 		{
 			btTypedConstraint* constraint = constraints[j];
 			constraint->buildJacobian();
+			constraint->internalSetAppliedImpulse(0.0f);
 		}
 	}
 
-	btSolverBody& fixedBody = m_tmpSolverBodyPool.expand();
-	initSolverBody(&fixedBody,0);
-
 	//btRigidBody* rb0=0,*rb1=0;
 
 	//if (1)
@@ -743,15 +1033,34 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 			int totalNumRows = 0;
 			int i;
 			
-			m_tmpConstraintSizesPool.resize(numConstraints);
+			m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
 			//calculate the total number of contraint rows
 			for (i=0;i<numConstraints;i++)
 			{
 				btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
-				constraints[i]->getInfo1(&info1);
+				btJointFeedback* fb = constraints[i]->getJointFeedback();
+				if (fb)
+				{
+					fb->m_appliedForceBodyA.setZero();
+					fb->m_appliedTorqueBodyA.setZero();
+					fb->m_appliedForceBodyB.setZero();
+					fb->m_appliedTorqueBodyB.setZero();
+				}
+
+				if (constraints[i]->isEnabled())
+				{
+				}
+				if (constraints[i]->isEnabled())
+				{
+					constraints[i]->getInfo1(&info1);
+				} else
+				{
+					info1.m_numConstraintRows = 0;
+					info1.nub = 0;
+				}
 				totalNumRows += info1.m_numConstraintRows;
 			}
-			m_tmpSolverNonContactConstraintPool.resize(totalNumRows);
+			m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
 
 			
 			///setup the btSolverConstraints
@@ -767,35 +1076,44 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 
 					btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
 					btTypedConstraint* constraint = constraints[i];
-
-
-
 					btRigidBody& rbA = constraint->getRigidBodyA();
 					btRigidBody& rbB = constraint->getRigidBodyB();
 
-					int solverBodyIdA = getOrInitSolverBody(rbA);
-					int solverBodyIdB = getOrInitSolverBody(rbB);
+                    int solverBodyIdA = getOrInitSolverBody(rbA);
+                    int solverBodyIdB = getOrInitSolverBody(rbB);
+
+                    btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
+                    btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
+
+
+
+
+					int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
+					if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
+						m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
 
-					btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
-					btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
 
 					int j;
 					for ( j=0;j<info1.m_numConstraintRows;j++)
 					{
 						memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
-						currentConstraintRow[j].m_lowerLimit = -FLT_MAX;
-						currentConstraintRow[j].m_upperLimit = FLT_MAX;
+						currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
+						currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
 						currentConstraintRow[j].m_appliedImpulse = 0.f;
 						currentConstraintRow[j].m_appliedPushImpulse = 0.f;
 						currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
 						currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
+						currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
 					}
 
-					bodyAPtr->m_deltaLinearVelocity.setValue(0.f,0.f,0.f);
-					bodyAPtr->m_deltaAngularVelocity.setValue(0.f,0.f,0.f);
-					bodyBPtr->m_deltaLinearVelocity.setValue(0.f,0.f,0.f);
-					bodyBPtr->m_deltaAngularVelocity.setValue(0.f,0.f,0.f);
-
+					bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+					bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+					bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
+					bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
+					bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
+					bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
+					bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
+					bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
 
 
 					btTypedConstraint::btConstraintInfo2 info2;
@@ -807,8 +1125,10 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 					info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
 					info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
 					///the size of btSolverConstraint needs be a multiple of btScalar
-					btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
+		            btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
 					info2.m_constraintError = &currentConstraintRow->m_rhs;
+					currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
+					info2.m_damping = infoGlobal.m_damping;
 					info2.cfm = &currentConstraintRow->m_cfm;
 					info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
 					info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
@@ -820,6 +1140,18 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 					{
 						btSolverConstraint& solverConstraint = currentConstraintRow[j];
 
+						if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
+						{
+							solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
+						}
+
+						if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
+						{
+							solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
+						}
+
+						solverConstraint.m_originalContactPoint = constraint;
+
 						{
 							const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
 							solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
@@ -839,8 +1171,9 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 							sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
 							sum += iMJlB.dot(solverConstraint.m_contactNormal);
 							sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
-
-							solverConstraint.m_jacDiagABInv = btScalar(1.)/sum;
+							btScalar fsum = btFabs(sum);
+							btAssert(fsum > SIMD_EPSILON);
+							solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?btScalar(1.)/sum : 0.f;
 						}
 
 
@@ -855,7 +1188,7 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 
 							btScalar restitution = 0.f;
 							btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
-							btScalar	velocityError = restitution - rel_vel;// * damping;
+							btScalar	velocityError = restitution - rel_vel * info2.m_damping;
 							btScalar	penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
 							btScalar	velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
 							solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
@@ -882,18 +1215,27 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 		}
 	}
 
-	btContactSolverInfo info = infoGlobal;
-
+//	btContactSolverInfo info = infoGlobal;
 
 
+	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
 	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
 	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
 
 	///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
-	m_orderTmpConstraintPool.resize(numConstraintPool);
-	m_orderFrictionConstraintPool.resize(numFrictionPool);
+	m_orderNonContactConstraintPool.resizeNoInitialize(numNonContactPool);
+	if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+		m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool*2);
+	else
+		m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool);
+
+	m_orderFrictionConstraintPool.resizeNoInitialize(numFrictionPool);
 	{
 		int i;
+		for (i=0;i<numNonContactPool;i++)
+		{
+			m_orderNonContactConstraintPool[i] = i;
+		}
 		for (i=0;i<numConstraintPool;i++)
 		{
 			m_orderTmpConstraintPool[i] = i;
@@ -908,66 +1250,135 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySetup(btCol
 
 }
 
-btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(btCollisionObject** /*bodies */,int /*numBodies*/,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/,btStackAlloc* /*stackAlloc*/)
-{
-	BT_PROFILE("solveGroupCacheFriendlyIterations");
 
+btScalar btSequentialImpulseConstraintSolver::solveSingleIteration(int iteration, btCollisionObject** /*bodies */,int /*numBodies*/,btPersistentManifold** /*manifoldPtr*/, int /*numManifolds*/,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* /*debugDrawer*/,btStackAlloc* /*stackAlloc*/)
+{
+
+	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
 	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
 	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
-
-	//should traverse the contacts random order...
-	int iteration;
+	
+	if (infoGlobal.m_solverMode & SOLVER_RANDMIZE_ORDER)
 	{
-		for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
-		{			
+		if (1)			// uncomment this for a bit less random ((iteration & 7) == 0)
+		{
 
-			int j;
-			if (infoGlobal.m_solverMode & SOLVER_RANDMIZE_ORDER)
-			{
-				if ((iteration & 7) == 0) {
-					for (j=0; j<numConstraintPool; ++j) {
-						int tmp = m_orderTmpConstraintPool[j];
-						int swapi = btRandInt2(j+1);
-						m_orderTmpConstraintPool[j] = m_orderTmpConstraintPool[swapi];
-						m_orderTmpConstraintPool[swapi] = tmp;
-					}
-
-					for (j=0; j<numFrictionPool; ++j) {
-						int tmp = m_orderFrictionConstraintPool[j];
-						int swapi = btRandInt2(j+1);
-						m_orderFrictionConstraintPool[j] = m_orderFrictionConstraintPool[swapi];
-						m_orderFrictionConstraintPool[swapi] = tmp;
-					}
-				}
+			for (int j=0; j<numNonContactPool; ++j) {
+				int tmp = m_orderNonContactConstraintPool[j];
+				int swapi = btRandInt2(j+1);
+				m_orderNonContactConstraintPool[j] = m_orderNonContactConstraintPool[swapi];
+				m_orderNonContactConstraintPool[swapi] = tmp;
 			}
 
-			if (infoGlobal.m_solverMode & SOLVER_SIMD)
+			//contact/friction constraints are not solved more than 
+			if (iteration< infoGlobal.m_numIterations)
 			{
-				///solve all joint constraints, using SIMD, if available
-				for (j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
-				{
-					btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j];
-					resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
+				for (int j=0; j<numConstraintPool; ++j) {
+					int tmp = m_orderTmpConstraintPool[j];
+					int swapi = btRandInt2(j+1);
+					m_orderTmpConstraintPool[j] = m_orderTmpConstraintPool[swapi];
+					m_orderTmpConstraintPool[swapi] = tmp;
 				}
 
-				for (j=0;j<numConstraints;j++)
-				{
-					int bodyAid = getOrInitSolverBody(constraints[j]->getRigidBodyA());
-					int bodyBid = getOrInitSolverBody(constraints[j]->getRigidBodyB());
-					btSolverBody& bodyA = m_tmpSolverBodyPool[bodyAid];
-					btSolverBody& bodyB = m_tmpSolverBodyPool[bodyBid];
-					constraints[j]->solveConstraintObsolete(bodyA,bodyB,infoGlobal.m_timeStep);
+				for (int j=0; j<numFrictionPool; ++j) {
+					int tmp = m_orderFrictionConstraintPool[j];
+					int swapi = btRandInt2(j+1);
+					m_orderFrictionConstraintPool[j] = m_orderFrictionConstraintPool[swapi];
+					m_orderFrictionConstraintPool[swapi] = tmp;
 				}
+			}
+		}
+	}
 
-				///solve all contact constraints using SIMD, if available
+	if (infoGlobal.m_solverMode & SOLVER_SIMD)
+	{
+		///solve all joint constraints, using SIMD, if available
+		for (int j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
+		{
+			btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
+			if (iteration < constraint.m_overrideNumSolverIterations)
+				resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
+		}
+
+		if (iteration< infoGlobal.m_numIterations)
+		{
+			for (int j=0;j<numConstraints;j++)
+			{
+                if (constraints[j]->isEnabled())
+                {
+                    int bodyAid = getOrInitSolverBody(constraints[j]->getRigidBodyA());
+                    int bodyBid = getOrInitSolverBody(constraints[j]->getRigidBodyB());
+                    btSolverBody& bodyA = m_tmpSolverBodyPool[bodyAid];
+                    btSolverBody& bodyB = m_tmpSolverBodyPool[bodyBid];
+                    constraints[j]->solveConstraintObsolete(bodyA,bodyB,infoGlobal.m_timeStep);
+                }
+			}
+
+			///solve all contact constraints using SIMD, if available
+			if (infoGlobal.m_solverMode & SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS)
+			{
 				int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+				int multiplier = (infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS)? 2 : 1;
+
+				for (int c=0;c<numPoolConstraints;c++)
+				{
+					btScalar totalImpulse =0;
+
+					{
+						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[c]];
+						resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
+						totalImpulse = solveManifold.m_appliedImpulse;
+					}
+					bool applyFriction = true;
+					if (applyFriction)
+					{
+						{
+
+							btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c*multiplier]];
+
+							if (totalImpulse>btScalar(0))
+							{
+								solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
+								solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
+
+								resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
+							}
+						}
+
+						if (infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS)
+						{
+
+							btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c*multiplier+1]];
+				
+							if (totalImpulse>btScalar(0))
+							{
+								solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
+								solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
+
+								resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
+							}
+						}
+					}
+				}
+
+			}
+			else//SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS
+			{
+				//solve the friction constraints after all contact constraints, don't interleave them
+				int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+				int j;
+
 				for (j=0;j<numPoolConstraints;j++)
 				{
 					const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
 					resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
 
 				}
+		
+				
+
 				///solve all friction constraints, using SIMD, if available
+
 				int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
 				for (j=0;j<numFrictionPoolConstraints;j++)
 				{
@@ -979,166 +1390,258 @@ btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(
 						solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
 						solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
 
-						resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],	m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
+						resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
 					}
 				}
-			} else
-			{
 
-				///solve all joint constraints
-				for (j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
+				
+				int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size();
+				for (j=0;j<numRollingFrictionPoolConstraints;j++)
 				{
-					btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j];
-					resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
-				}
-
-				for (j=0;j<numConstraints;j++)
-				{
-					int bodyAid = getOrInitSolverBody(constraints[j]->getRigidBodyA());
-					int bodyBid = getOrInitSolverBody(constraints[j]->getRigidBodyB());
-					btSolverBody& bodyA = m_tmpSolverBodyPool[bodyAid];
-					btSolverBody& bodyB = m_tmpSolverBodyPool[bodyBid];
-
-					constraints[j]->solveConstraintObsolete(bodyA,bodyB,infoGlobal.m_timeStep);
-				}
-
-				///solve all contact constraints
-				int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-				for (j=0;j<numPoolConstraints;j++)
-				{
-					const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
-					resolveSingleConstraintRowLowerLimit(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-				}
-				///solve all friction constraints
-				int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
-				for (j=0;j<numFrictionPoolConstraints;j++)
-				{
-					btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
-					btScalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
 
+					btSolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j];
+					btScalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse;
 					if (totalImpulse>btScalar(0))
 					{
-						solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
-						solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
+						btScalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction*totalImpulse;
+						if (rollingFrictionMagnitude>rollingFrictionConstraint.m_friction)
+							rollingFrictionMagnitude = rollingFrictionConstraint.m_friction;
 
-						resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],							m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
+						rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude;
+						rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude;
+
+						resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA],m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB],rollingFrictionConstraint);
 					}
 				}
-			}
+				
 
+			}			
+		}
+	} else
+	{
+		//non-SIMD version
+		///solve all joint constraints
+		for (int j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
+		{
+			btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
+			if (iteration < constraint.m_overrideNumSolverIterations)
+				resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
 		}
 
-			if (infoGlobal.m_splitImpulse)
+		if (iteration< infoGlobal.m_numIterations)
+		{
+			for (int j=0;j<numConstraints;j++)
 			{
-				if (infoGlobal.m_solverMode & SOLVER_SIMD)
-				{
-					for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
-					{
-						{
-							int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-							int j;
-							for (j=0;j<numPoolConstraints;j++)
-							{
-								const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+                if (constraints[j]->isEnabled())
+                {
+                    int bodyAid = getOrInitSolverBody(constraints[j]->getRigidBodyA());
+                    int bodyBid = getOrInitSolverBody(constraints[j]->getRigidBodyB());
+                    btSolverBody& bodyA = m_tmpSolverBodyPool[bodyAid];
+                    btSolverBody& bodyB = m_tmpSolverBodyPool[bodyBid];
+                    constraints[j]->solveConstraintObsolete(bodyA,bodyB,infoGlobal.m_timeStep);
+                }
+			}
+			///solve all contact constraints
+			int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+			for (int j=0;j<numPoolConstraints;j++)
+			{
+				const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+				resolveSingleConstraintRowLowerLimit(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
+			}
+			///solve all friction constraints
+			int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
+			for (int j=0;j<numFrictionPoolConstraints;j++)
+			{
+				btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
+				btScalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
 
-								resolveSplitPenetrationSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],
-									m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-							}
-						}
-					}
+				if (totalImpulse>btScalar(0))
+				{
+					solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
+					solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
+
+					resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
 				}
-				else
-				{
-					for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
-					{
-						{
-							int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-							int j;
-							for (j=0;j<numPoolConstraints;j++)
-							{
-								const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+			}
 
-								resolveSplitPenetrationImpulseCacheFriendly(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],
-									m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-							}
-						}
+			int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size();
+			for (int j=0;j<numRollingFrictionPoolConstraints;j++)
+			{
+				btSolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j];
+				btScalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse;
+				if (totalImpulse>btScalar(0))
+				{
+					btScalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction*totalImpulse;
+					if (rollingFrictionMagnitude>rollingFrictionConstraint.m_friction)
+						rollingFrictionMagnitude = rollingFrictionConstraint.m_friction;
+
+					rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude;
+					rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude;
+
+					resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA],m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB],rollingFrictionConstraint);
+				}
+			}
+		}
+	}
+	return 0.f;
+}
+
+
+void btSequentialImpulseConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+{
+	int iteration;
+	if (infoGlobal.m_splitImpulse)
+	{
+		if (infoGlobal.m_solverMode & SOLVER_SIMD)
+		{
+			for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
+			{
+				{
+					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+					int j;
+					for (j=0;j<numPoolConstraints;j++)
+					{
+						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+
+						resolveSplitPenetrationSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
 					}
 				}
 			}
+		}
+		else
+		{
+			for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
+			{
+				{
+					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+					int j;
+					for (j=0;j<numPoolConstraints;j++)
+					{
+						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
+
+						resolveSplitPenetrationImpulseCacheFriendly(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
+					}
+				}
+			}
+		}
+	}
+}
+
+btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyIterations(btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc)
+{
+	BT_PROFILE("solveGroupCacheFriendlyIterations");
+
+	{
+		///this is a special step to resolve penetrations (just for contacts)
+		solveGroupCacheFriendlySplitImpulseIterations(bodies ,numBodies,manifoldPtr, numManifolds,constraints,numConstraints,infoGlobal,debugDrawer,stackAlloc);
+
+		int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations;
+
+		for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
+		//for ( int iteration = maxIterations-1  ; iteration >= 0;iteration--)
+		{			
+			solveSingleIteration(iteration, bodies ,numBodies,manifoldPtr, numManifolds,constraints,numConstraints,infoGlobal,debugDrawer,stackAlloc);
+		}
 		
 	}
 	return 0.f;
 }
 
+btScalar btSequentialImpulseConstraintSolver::solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal)
+{
+	int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
+	int i,j;
+
+	if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
+	{
+		for (j=0;j<numPoolConstraints;j++)
+		{
+			const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
+			btManifoldPoint* pt = (btManifoldPoint*) solveManifold.m_originalContactPoint;
+			btAssert(pt);
+			pt->m_appliedImpulse = solveManifold.m_appliedImpulse;
+		//	float f = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
+			//	printf("pt->m_appliedImpulseLateral1 = %f\n", f);
+			pt->m_appliedImpulseLateral1 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
+			//printf("pt->m_appliedImpulseLateral1 = %f\n", pt->m_appliedImpulseLateral1);
+			if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
+			{
+				pt->m_appliedImpulseLateral2 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex+1].m_appliedImpulse;
+			}
+			//do a callback here?
+		}
+	}
+
+	numPoolConstraints = m_tmpSolverNonContactConstraintPool.size();
+	for (j=0;j<numPoolConstraints;j++)
+	{
+		const btSolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j];
+		btTypedConstraint* constr = (btTypedConstraint*)solverConstr.m_originalContactPoint;
+		btJointFeedback* fb = constr->getJointFeedback();
+		if (fb)
+		{
+			fb->m_appliedForceBodyA += solverConstr.m_contactNormal*solverConstr.m_appliedImpulse*constr->getRigidBodyA().getLinearFactor()/infoGlobal.m_timeStep;
+			fb->m_appliedForceBodyB += -solverConstr.m_contactNormal*solverConstr.m_appliedImpulse*constr->getRigidBodyB().getLinearFactor()/infoGlobal.m_timeStep;
+			fb->m_appliedTorqueBodyA += solverConstr.m_relpos1CrossNormal* constr->getRigidBodyA().getAngularFactor()*solverConstr.m_appliedImpulse/infoGlobal.m_timeStep;
+			fb->m_appliedTorqueBodyB += -solverConstr.m_relpos1CrossNormal* constr->getRigidBodyB().getAngularFactor()*solverConstr.m_appliedImpulse/infoGlobal.m_timeStep;
+			
+		}
+
+		constr->internalSetAppliedImpulse(solverConstr.m_appliedImpulse);
+		if (btFabs(solverConstr.m_appliedImpulse)>=constr->getBreakingImpulseThreshold())
+		{
+			constr->setEnabled(false);
+		}
+	}
+
+
+
+	for ( i=0;i<m_tmpSolverBodyPool.size();i++)
+	{
+		btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody;
+		if (body)
+		{
+			if (infoGlobal.m_splitImpulse)
+				m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp);
+			else
+				m_tmpSolverBodyPool[i].writebackVelocity();
+
+			m_tmpSolverBodyPool[i].m_originalBody->setLinearVelocity(m_tmpSolverBodyPool[i].m_linearVelocity);
+			m_tmpSolverBodyPool[i].m_originalBody->setAngularVelocity(m_tmpSolverBodyPool[i].m_angularVelocity);
+			if (infoGlobal.m_splitImpulse)
+				m_tmpSolverBodyPool[i].m_originalBody->setWorldTransform(m_tmpSolverBodyPool[i].m_worldTransform);
+
+			m_tmpSolverBodyPool[i].m_originalBody->setCompanionId(-1);
+		}
+	}
+
+	m_tmpSolverContactConstraintPool.resizeNoInitialize(0);
+	m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0);
+	m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0);
+	m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0);
+
+	m_tmpSolverBodyPool.resizeNoInitialize(0);
+	return 0.f;
+}
+
 
 
 /// btSequentialImpulseConstraintSolver Sequentially applies impulses
 btScalar btSequentialImpulseConstraintSolver::solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc,btDispatcher* /*dispatcher*/)
 {
 
-	
-
 	BT_PROFILE("solveGroup");
-	//we only implement SOLVER_CACHE_FRIENDLY now
 	//you need to provide at least some bodies
-	btAssert(bodies);
-	btAssert(numBodies);
-
-	int i;
-
+	
 	solveGroupCacheFriendlySetup( bodies, numBodies, manifoldPtr,  numManifolds,constraints, numConstraints,infoGlobal,debugDrawer, stackAlloc);
+
 	solveGroupCacheFriendlyIterations(bodies, numBodies, manifoldPtr,  numManifolds,constraints, numConstraints,infoGlobal,debugDrawer, stackAlloc);
 
-	int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-	int j;
-
-	for (j=0;j<numPoolConstraints;j++)
-	{
-
-		const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
-		btManifoldPoint* pt = (btManifoldPoint*) solveManifold.m_originalContactPoint;
-		btAssert(pt);
-		pt->m_appliedImpulse = solveManifold.m_appliedImpulse;
-		if (infoGlobal.m_solverMode & SOLVER_USE_FRICTION_WARMSTARTING)
-		{
-			pt->m_appliedImpulseLateral1 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
-			pt->m_appliedImpulseLateral2 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex+1].m_appliedImpulse;
-		}
-
-		//do a callback here?
-	}
-
-	if (infoGlobal.m_splitImpulse)
-	{		
-		for ( i=0;i<m_tmpSolverBodyPool.size();i++)
-		{
-			m_tmpSolverBodyPool[i].writebackVelocity(infoGlobal.m_timeStep);
-		}
-	} else
-	{
-		for ( i=0;i<m_tmpSolverBodyPool.size();i++)
-		{
-			m_tmpSolverBodyPool[i].writebackVelocity();
-		}
-	}
-
-
-	m_tmpSolverBodyPool.resize(0);
-	m_tmpSolverContactConstraintPool.resize(0);
-	m_tmpSolverNonContactConstraintPool.resize(0);
-	m_tmpSolverContactFrictionConstraintPool.resize(0);
-
+	solveGroupCacheFriendlyFinish(bodies, numBodies, infoGlobal);
+	
 	return 0.f;
 }
 
-
-
-
-
-
-
-
-
 void	btSequentialImpulseConstraintSolver::reset()
 {
 	m_btSeed2 = 0;
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h
index 2ec39251e..2eea6be0d 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h
@@ -13,73 +13,109 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
-#define SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
+#ifndef BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
+#define BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
 
-#include "btConstraintSolver.h"
 class btIDebugDraw;
-#include "btContactConstraint.h"
-#include "btSolverBody.h"
-#include "btSolverConstraint.h"
-#include "btTypedConstraint.h"
+class btPersistentManifold;
+class btStackAlloc;
+class btDispatcher;
+class btCollisionObject;
+#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+#include "BulletDynamics/ConstraintSolver/btSolverBody.h"
+#include "BulletDynamics/ConstraintSolver/btSolverConstraint.h"
 #include "BulletCollision/NarrowPhaseCollision/btManifoldPoint.h"
+#include "BulletDynamics/ConstraintSolver/btConstraintSolver.h"
 
 ///The btSequentialImpulseConstraintSolver is a fast SIMD implementation of the Projected Gauss Seidel (iterative LCP) method.
-class btSequentialImpulseConstraintSolver : public btConstraintSolver
+ATTRIBUTE_ALIGNED16(class) btSequentialImpulseConstraintSolver : public btConstraintSolver
 {
 protected:
-
-	btAlignedObjectArray<btSolverBody>	m_tmpSolverBodyPool;
+	btAlignedObjectArray<btSolverBody>      m_tmpSolverBodyPool;
 	btConstraintArray			m_tmpSolverContactConstraintPool;
 	btConstraintArray			m_tmpSolverNonContactConstraintPool;
 	btConstraintArray			m_tmpSolverContactFrictionConstraintPool;
+	btConstraintArray			m_tmpSolverContactRollingFrictionConstraintPool;
+
 	btAlignedObjectArray<int>	m_orderTmpConstraintPool;
+	btAlignedObjectArray<int>	m_orderNonContactConstraintPool;
 	btAlignedObjectArray<int>	m_orderFrictionConstraintPool;
 	btAlignedObjectArray<btTypedConstraint::btConstraintInfo1> m_tmpConstraintSizesPool;
+	int							m_maxOverrideNumSolverIterations;
+
+	void setupFrictionConstraint(	btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
+									btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
+									btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, 
+									btScalar desiredVelocity=0., btScalar cfmSlip=0.);
+
+	void setupRollingFrictionConstraint(	btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
+									btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
+									btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, 
+									btScalar desiredVelocity=0., btScalar cfmSlip=0.);
+
+	btSolverConstraint&	addFrictionConstraint(const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity=0., btScalar cfmSlip=0.);
+	btSolverConstraint&	addRollingFrictionConstraint(const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation, btScalar desiredVelocity=0, btScalar cfmSlip=0.f);
+
+
+	void setupContactConstraint(btSolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, btManifoldPoint& cp, 
+								const btContactSolverInfo& infoGlobal, btVector3& vel, btScalar& rel_vel, btScalar& relaxation, 
+								btVector3& rel_pos1, btVector3& rel_pos2);
+
+	void setFrictionConstraintImpulse( btSolverConstraint& solverConstraint, int solverBodyIdA,int solverBodyIdB, 
+										 btManifoldPoint& cp, const btContactSolverInfo& infoGlobal);
 
-	btSolverConstraint&	addFrictionConstraint(const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btManifoldPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btCollisionObject* colObj0,btCollisionObject* colObj1, btScalar relaxation);
-	
 	///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction
 	unsigned long	m_btSeed2;
 
-	void	initSolverBody(btSolverBody* solverBody, btCollisionObject* collisionObject);
+	
 	btScalar restitutionCurve(btScalar rel_vel, btScalar restitution);
 
 	void	convertContact(btPersistentManifold* manifold,const btContactSolverInfo& infoGlobal);
 
 
 	void	resolveSplitPenetrationSIMD(
-        btSolverBody& body1,
-        btSolverBody& body2,
+     btSolverBody& bodyA,btSolverBody& bodyB,
         const btSolverConstraint& contactConstraint);
 
 	void	resolveSplitPenetrationImpulseCacheFriendly(
-        btSolverBody& body1,
-        btSolverBody& body2,
+       btSolverBody& bodyA,btSolverBody& bodyB,
         const btSolverConstraint& contactConstraint);
 
 	//internal method
-	int	getOrInitSolverBody(btCollisionObject& body);
+	int		getOrInitSolverBody(btCollisionObject& body);
+	void	initSolverBody(btSolverBody* solverBody, btCollisionObject* collisionObject);
 
-	void	resolveSingleConstraintRowGeneric(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& contactConstraint);
+	void	resolveSingleConstraintRowGeneric(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 
-	void	resolveSingleConstraintRowGenericSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& contactConstraint);
+	void	resolveSingleConstraintRowGenericSIMD(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 	
-	void	resolveSingleConstraintRowLowerLimit(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& contactConstraint);
+	void	resolveSingleConstraintRowLowerLimit(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 	
-	void	resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& contactConstraint);
+	void	resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
 		
+protected:
+	
+	
+	virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+	virtual btScalar solveGroupCacheFriendlyFinish(btCollisionObject** bodies,int numBodies,const btContactSolverInfo& infoGlobal);
+	btScalar solveSingleIteration(int iteration, btCollisionObject** bodies ,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+
+	virtual btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+	virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
+
+
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
 	
 	btSequentialImpulseConstraintSolver();
 	virtual ~btSequentialImpulseConstraintSolver();
 
 	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
 	
-	btScalar solveGroupCacheFriendlySetup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
-	btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer,btStackAlloc* stackAlloc);
 
+	
 	///clear internal cached data and reset random seed
 	virtual	void	reset();
 	
@@ -98,10 +134,8 @@ public:
 
 };
 
-#ifndef BT_PREFER_SIMD
-typedef btSequentialImpulseConstraintSolver btSequentialImpulseConstraintSolverPrefered;
-#endif
 
 
-#endif //SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
+
+#endif //BT_SEQUENTIAL_IMPULSE_CONSTRAINT_SOLVER_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp
index aa305d7c1..b69f46da1 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.cpp
@@ -25,7 +25,7 @@ April 04, 2008
 #include "LinearMath/btTransformUtil.h"
 #include <new>
 
-
+#define USE_OFFSET_FOR_CONSTANT_FRAME true
 
 void btSliderConstraint::initParams()
 {
@@ -36,21 +36,27 @@ void btSliderConstraint::initParams()
 	m_softnessDirLin = SLIDER_CONSTRAINT_DEF_SOFTNESS;
 	m_restitutionDirLin = SLIDER_CONSTRAINT_DEF_RESTITUTION;
 	m_dampingDirLin = btScalar(0.);
+	m_cfmDirLin = SLIDER_CONSTRAINT_DEF_CFM;
 	m_softnessDirAng = SLIDER_CONSTRAINT_DEF_SOFTNESS;
 	m_restitutionDirAng = SLIDER_CONSTRAINT_DEF_RESTITUTION;
 	m_dampingDirAng = btScalar(0.);
+	m_cfmDirAng = SLIDER_CONSTRAINT_DEF_CFM;
 	m_softnessOrthoLin = SLIDER_CONSTRAINT_DEF_SOFTNESS;
 	m_restitutionOrthoLin = SLIDER_CONSTRAINT_DEF_RESTITUTION;
 	m_dampingOrthoLin = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmOrthoLin = SLIDER_CONSTRAINT_DEF_CFM;
 	m_softnessOrthoAng = SLIDER_CONSTRAINT_DEF_SOFTNESS;
 	m_restitutionOrthoAng = SLIDER_CONSTRAINT_DEF_RESTITUTION;
 	m_dampingOrthoAng = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmOrthoAng = SLIDER_CONSTRAINT_DEF_CFM;
 	m_softnessLimLin = SLIDER_CONSTRAINT_DEF_SOFTNESS;
 	m_restitutionLimLin = SLIDER_CONSTRAINT_DEF_RESTITUTION;
 	m_dampingLimLin = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmLimLin = SLIDER_CONSTRAINT_DEF_CFM;
 	m_softnessLimAng = SLIDER_CONSTRAINT_DEF_SOFTNESS;
 	m_restitutionLimAng = SLIDER_CONSTRAINT_DEF_RESTITUTION;
 	m_dampingLimAng = SLIDER_CONSTRAINT_DEF_DAMPING;
+	m_cfmLimAng = SLIDER_CONSTRAINT_DEF_CFM;
 
 	m_poweredLinMotor = false;
     m_targetLinMotorVelocity = btScalar(0.);
@@ -62,18 +68,16 @@ void btSliderConstraint::initParams()
     m_maxAngMotorForce = btScalar(0.);
 	m_accumulatedAngMotorImpulse = btScalar(0.0);
 
+	m_flags = 0;
+	m_flags = 0;
+
+	m_useOffsetForConstraintFrame = USE_OFFSET_FOR_CONSTANT_FRAME;
+
+	calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
 }
 
 
 
-btSliderConstraint::btSliderConstraint()
-        :btTypedConstraint(SLIDER_CONSTRAINT_TYPE),
-		m_useSolveConstraintObsolete(false),
-		m_useLinearReferenceFrameA(true)
-{
-	initParams();
-}
-
 
 
 btSliderConstraint::btSliderConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB, bool useLinearReferenceFrameA)
@@ -87,14 +91,15 @@ btSliderConstraint::btSliderConstraint(btRigidBody& rbA, btRigidBody& rbB, const
 }
 
 
-static btRigidBody s_fixed(0, 0, 0);
-btSliderConstraint::btSliderConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB)
-        : btTypedConstraint(SLIDER_CONSTRAINT_TYPE, s_fixed, rbB),
+
+btSliderConstraint::btSliderConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameA)
+        : btTypedConstraint(SLIDER_CONSTRAINT_TYPE, getFixedBody(), rbB),
 		m_useSolveConstraintObsolete(false),
 		m_frameInB(frameInB),
-		m_useLinearReferenceFrameA(useLinearReferenceFrameB)
+		m_useLinearReferenceFrameA(useLinearReferenceFrameA)
 {
-	///not providing rigidbody B means implicitly using worldspace for body B
+	///not providing rigidbody A means implicitly using worldspace for body A
+	m_frameInA = rbB.getCenterOfMassTransform() * m_frameInB;
 //	m_frameInA.getOrigin() = m_rbA.getCenterOfMassTransform()(m_frameInA.getOrigin());
 
 	initParams();
@@ -102,80 +107,9 @@ btSliderConstraint::btSliderConstraint(btRigidBody& rbB, const btTransform& fram
 
 
 
-void btSliderConstraint::buildJacobian()
-{
-	if (!m_useSolveConstraintObsolete) 
-	{
-		return;
-	}
-	if(m_useLinearReferenceFrameA)
-	{
-		buildJacobianInt(m_rbA, m_rbB, m_frameInA, m_frameInB);
-	}
-	else
-	{
-		buildJacobianInt(m_rbB, m_rbA, m_frameInB, m_frameInA);
-	}
-}
 
 
 
-void btSliderConstraint::buildJacobianInt(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB)
-{
-#ifndef __SPU__
-	//calculate transforms
-    m_calculatedTransformA = rbA.getCenterOfMassTransform() * frameInA;
-    m_calculatedTransformB = rbB.getCenterOfMassTransform() * frameInB;
-	m_realPivotAInW = m_calculatedTransformA.getOrigin();
-	m_realPivotBInW = m_calculatedTransformB.getOrigin();
-	m_sliderAxis = m_calculatedTransformA.getBasis().getColumn(0); // along X
-	m_delta = m_realPivotBInW - m_realPivotAInW;
-	m_projPivotInW = m_realPivotAInW + m_sliderAxis.dot(m_delta) * m_sliderAxis;
-	m_relPosA = m_projPivotInW - rbA.getCenterOfMassPosition();
-	m_relPosB = m_realPivotBInW - rbB.getCenterOfMassPosition();
-    btVector3 normalWorld;
-    int i;
-    //linear part
-    for(i = 0; i < 3; i++)
-    {
-		normalWorld = m_calculatedTransformA.getBasis().getColumn(i);
-		new (&m_jacLin[i]) btJacobianEntry(
-			rbA.getCenterOfMassTransform().getBasis().transpose(),
-			rbB.getCenterOfMassTransform().getBasis().transpose(),
-			m_relPosA,
-			m_relPosB,
-			normalWorld,
-			rbA.getInvInertiaDiagLocal(),
-			rbA.getInvMass(),
-			rbB.getInvInertiaDiagLocal(),
-			rbB.getInvMass()
-			);
-		m_jacLinDiagABInv[i] = btScalar(1.) / m_jacLin[i].getDiagonal();
-		m_depth[i] = m_delta.dot(normalWorld);
-    }
-	testLinLimits();
-    // angular part
-    for(i = 0; i < 3; i++)
-    {
-		normalWorld = m_calculatedTransformA.getBasis().getColumn(i);
-		new (&m_jacAng[i])	btJacobianEntry(
-			normalWorld,
-            rbA.getCenterOfMassTransform().getBasis().transpose(),
-            rbB.getCenterOfMassTransform().getBasis().transpose(),
-            rbA.getInvInertiaDiagLocal(),
-            rbB.getInvInertiaDiagLocal()
-			);
-	}
-	testAngLimits();
-	btVector3 axisA = m_calculatedTransformA.getBasis().getColumn(0);
-	m_kAngle = btScalar(1.0 )/ (rbA.computeAngularImpulseDenominator(axisA) + rbB.computeAngularImpulseDenominator(axisA));
-	// clear accumulator for motors
-	m_accumulatedLinMotorImpulse = btScalar(0.0);
-	m_accumulatedAngMotorImpulse = btScalar(0.0);
-#endif //__SPU__
-}
-
-
 void btSliderConstraint::getInfo1(btConstraintInfo1* info)
 {
 	if (m_useSolveConstraintObsolete)
@@ -189,13 +123,13 @@ void btSliderConstraint::getInfo1(btConstraintInfo1* info)
 		info->nub = 2; 
 		//prepare constraint
 		calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+		testAngLimits();
 		testLinLimits();
 		if(getSolveLinLimit() || getPoweredLinMotor())
 		{
 			info->m_numConstraintRows++; // limit 3rd linear as well
 			info->nub--; 
 		}
-		testAngLimits();
 		if(getSolveAngLimit() || getPoweredAngMotor())
 		{
 			info->m_numConstraintRows++; // limit 3rd angular as well
@@ -216,13 +150,123 @@ void btSliderConstraint::getInfo2(btConstraintInfo2* info)
 	getInfo2NonVirtual(info,m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform(), m_rbA.getLinearVelocity(),m_rbB.getLinearVelocity(), m_rbA.getInvMass(),m_rbB.getInvMass());
 }
 
+
+
+
+
+
+
+void btSliderConstraint::calculateTransforms(const btTransform& transA,const btTransform& transB)
+{
+	if(m_useLinearReferenceFrameA || (!m_useSolveConstraintObsolete))
+	{
+		m_calculatedTransformA = transA * m_frameInA;
+		m_calculatedTransformB = transB * m_frameInB;
+	}
+	else
+	{
+		m_calculatedTransformA = transB * m_frameInB;
+		m_calculatedTransformB = transA * m_frameInA;
+	}
+	m_realPivotAInW = m_calculatedTransformA.getOrigin();
+	m_realPivotBInW = m_calculatedTransformB.getOrigin();
+	m_sliderAxis = m_calculatedTransformA.getBasis().getColumn(0); // along X
+	if(m_useLinearReferenceFrameA || m_useSolveConstraintObsolete)
+	{
+		m_delta = m_realPivotBInW - m_realPivotAInW;
+	}
+	else
+	{
+		m_delta = m_realPivotAInW - m_realPivotBInW;
+	}
+	m_projPivotInW = m_realPivotAInW + m_sliderAxis.dot(m_delta) * m_sliderAxis;
+    btVector3 normalWorld;
+    int i;
+    //linear part
+    for(i = 0; i < 3; i++)
+    {
+		normalWorld = m_calculatedTransformA.getBasis().getColumn(i);
+		m_depth[i] = m_delta.dot(normalWorld);
+    }
+}
+ 
+
+
+void btSliderConstraint::testLinLimits(void)
+{
+	m_solveLinLim = false;
+	m_linPos = m_depth[0];
+	if(m_lowerLinLimit <= m_upperLinLimit)
+	{
+		if(m_depth[0] > m_upperLinLimit)
+		{
+			m_depth[0] -= m_upperLinLimit;
+			m_solveLinLim = true;
+		}
+		else if(m_depth[0] < m_lowerLinLimit)
+		{
+			m_depth[0] -= m_lowerLinLimit;
+			m_solveLinLim = true;
+		}
+		else
+		{
+			m_depth[0] = btScalar(0.);
+		}
+	}
+	else
+	{
+		m_depth[0] = btScalar(0.);
+	}
+}
+
+
+
+void btSliderConstraint::testAngLimits(void)
+{
+	m_angDepth = btScalar(0.);
+	m_solveAngLim = false;
+	if(m_lowerAngLimit <= m_upperAngLimit)
+	{
+		const btVector3 axisA0 = m_calculatedTransformA.getBasis().getColumn(1);
+		const btVector3 axisA1 = m_calculatedTransformA.getBasis().getColumn(2);
+		const btVector3 axisB0 = m_calculatedTransformB.getBasis().getColumn(1);
+//		btScalar rot = btAtan2Fast(axisB0.dot(axisA1), axisB0.dot(axisA0));  
+		btScalar rot = btAtan2(axisB0.dot(axisA1), axisB0.dot(axisA0));  
+		rot = btAdjustAngleToLimits(rot, m_lowerAngLimit, m_upperAngLimit);
+		m_angPos = rot;
+		if(rot < m_lowerAngLimit)
+		{
+			m_angDepth = rot - m_lowerAngLimit;
+			m_solveAngLim = true;
+		} 
+		else if(rot > m_upperAngLimit)
+		{
+			m_angDepth = rot - m_upperAngLimit;
+			m_solveAngLim = true;
+		}
+	}
+}
+
+btVector3 btSliderConstraint::getAncorInA(void)
+{
+	btVector3 ancorInA;
+	ancorInA = m_realPivotAInW + (m_lowerLinLimit + m_upperLinLimit) * btScalar(0.5) * m_sliderAxis;
+	ancorInA = m_rbA.getCenterOfMassTransform().inverse() * ancorInA;
+	return ancorInA;
+}
+
+
+
+btVector3 btSliderConstraint::getAncorInB(void)
+{
+	btVector3 ancorInB;
+	ancorInB = m_frameInB.getOrigin();
+	return ancorInB;
+}
+
+
 void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTransform& transA,const btTransform& transB, const btVector3& linVelA,const btVector3& linVelB, btScalar rbAinvMass,btScalar rbBinvMass  )
 {
-	//prepare constraint
-	calculateTransforms(transA,transB);
-	testLinLimits();
-	testAngLimits();
-
 	const btTransform& trA = getCalculatedTransformA();
 	const btTransform& trB = getCalculatedTransformB();
 	
@@ -230,7 +274,44 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 	int i, s = info->rowskip;
 	
 	btScalar signFact = m_useLinearReferenceFrameA ? btScalar(1.0f) : btScalar(-1.0f);
-	// make rotations around Y and Z equal
+	
+	// difference between frames in WCS
+	btVector3 ofs = trB.getOrigin() - trA.getOrigin();
+	// now get weight factors depending on masses
+	btScalar miA = rbAinvMass;
+	btScalar miB = rbBinvMass;
+	bool hasStaticBody = (miA < SIMD_EPSILON) || (miB < SIMD_EPSILON);
+	btScalar miS = miA + miB;
+	btScalar factA, factB;
+	if(miS > btScalar(0.f))
+	{
+		factA = miB / miS;
+	}
+	else 
+	{
+		factA = btScalar(0.5f);
+	}
+	factB = btScalar(1.0f) - factA;
+	btVector3 ax1, p, q;
+	btVector3 ax1A = trA.getBasis().getColumn(0);
+	btVector3 ax1B = trB.getBasis().getColumn(0);
+	if(m_useOffsetForConstraintFrame)
+	{
+		// get the desired direction of slider axis
+		// as weighted sum of X-orthos of frameA and frameB in WCS
+		ax1 = ax1A * factA + ax1B * factB;
+		ax1.normalize();
+		// construct two orthos to slider axis
+		btPlaneSpace1 (ax1, p, q);
+	}
+	else
+	{ // old way - use frameA
+		ax1 = trA.getBasis().getColumn(0);
+		// get 2 orthos to slider axis (Y, Z)
+		p = trA.getBasis().getColumn(1);
+		q = trA.getBasis().getColumn(2);
+	}
+	// make rotations around these orthos equal
 	// the slider axis should be the only unconstrained
 	// rotational axis, the angular velocity of the two bodies perpendicular to
 	// the slider axis should be equal. thus the constraint equations are
@@ -238,12 +319,6 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 	//    q*w1 - q*w2 = 0
 	// where p and q are unit vectors normal to the slider axis, and w1 and w2
 	// are the angular velocity vectors of the two bodies.
-	// get slider axis (X)
-	btVector3 ax1 = trA.getBasis().getColumn(0);
-	// get 2 orthos to slider axis (Y, Z)
-	btVector3 p = trA.getBasis().getColumn(1);
-	btVector3 q = trA.getBasis().getColumn(2);
-	// set the two slider rows 
 	info->m_J1angularAxis[0] = p[0];
 	info->m_J1angularAxis[1] = p[1];
 	info->m_J1angularAxis[2] = p[2];
@@ -259,8 +334,8 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 	info->m_J2angularAxis[s+2] = -q[2];
 	// compute the right hand side of the constraint equation. set relative
 	// body velocities along p and q to bring the slider back into alignment.
-	// if ax1,ax2 are the unit length slider axes as computed from body1 and
-	// body2, we need to rotate both bodies along the axis u = (ax1 x ax2).
+	// if ax1A,ax1B are the unit length slider axes as computed from bodyA and
+	// bodyB, we need to rotate both bodies along the axis u = (ax1 x ax2).
 	// if "theta" is the angle between ax1 and ax2, we need an angular velocity
 	// along u to cover angle erp*theta in one step :
 	//   |angular_velocity| = angle/time = erp*theta / stepsize
@@ -272,64 +347,126 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 	//    angular_velocity  = (erp*fps) * (ax1 x ax2)
 	// ax1 x ax2 is in the plane space of ax1, so we project the angular
 	// velocity to p and q to find the right hand side.
-	btScalar k = info->fps * info->erp * getSoftnessOrthoAng();
-    btVector3 ax2 = trB.getBasis().getColumn(0);
-	btVector3 u = ax1.cross(ax2);
+//	btScalar k = info->fps * info->erp * getSoftnessOrthoAng();
+	btScalar currERP = (m_flags & BT_SLIDER_FLAGS_ERP_ORTANG) ? m_softnessOrthoAng : m_softnessOrthoAng * info->erp;
+	btScalar k = info->fps * currERP;
+
+	btVector3 u = ax1A.cross(ax1B);
 	info->m_constraintError[0] = k * u.dot(p);
 	info->m_constraintError[s] = k * u.dot(q);
-	// pull out pos and R for both bodies. also get the connection
-	// vector c = pos2-pos1.
-	// next two rows. we want: vel2 = vel1 + w1 x c ... but this would
-	// result in three equations, so we project along the planespace vectors
-	// so that sliding along the slider axis is disregarded. for symmetry we
-	// also consider rotation around center of mass of two bodies (factA and factB).
+	if(m_flags & BT_SLIDER_FLAGS_CFM_ORTANG)
+	{
+		info->cfm[0] = m_cfmOrthoAng;
+		info->cfm[s] = m_cfmOrthoAng;
+	}
+
+	int nrow = 1; // last filled row
+	int srow;
+	btScalar limit_err;
+	int limit;
+	int powered;
+
+	// next two rows. 
+	// we want: velA + wA x relA == velB + wB x relB ... but this would
+	// result in three equations, so we project along two orthos to the slider axis
+
 	btTransform bodyA_trans = transA;
 	btTransform bodyB_trans = transB;
-	int s2 = 2 * s, s3 = 3 * s;
-	btVector3 c;
-	btScalar miA = rbAinvMass;
-	btScalar miB = rbBinvMass;
-	btScalar miS = miA + miB;
-	btScalar factA, factB;
-	if(miS > btScalar(0.f))
+	nrow++;
+	int s2 = nrow * s;
+	nrow++;
+	int s3 = nrow * s;
+	btVector3 tmpA(0,0,0), tmpB(0,0,0), relA(0,0,0), relB(0,0,0), c(0,0,0);
+	if(m_useOffsetForConstraintFrame)
 	{
-		factA = miB / miS;
+		// get vector from bodyB to frameB in WCS
+		relB = trB.getOrigin() - bodyB_trans.getOrigin();
+		// get its projection to slider axis
+		btVector3 projB = ax1 * relB.dot(ax1);
+		// get vector directed from bodyB to slider axis (and orthogonal to it)
+		btVector3 orthoB = relB - projB;
+		// same for bodyA
+		relA = trA.getOrigin() - bodyA_trans.getOrigin();
+		btVector3 projA = ax1 * relA.dot(ax1);
+		btVector3 orthoA = relA - projA;
+		// get desired offset between frames A and B along slider axis
+		btScalar sliderOffs = m_linPos - m_depth[0];
+		// desired vector from projection of center of bodyA to projection of center of bodyB to slider axis
+		btVector3 totalDist = projA + ax1 * sliderOffs - projB;
+		// get offset vectors relA and relB
+		relA = orthoA + totalDist * factA;
+		relB = orthoB - totalDist * factB;
+		// now choose average ortho to slider axis
+		p = orthoB * factA + orthoA * factB;
+		btScalar len2 = p.length2();
+		if(len2 > SIMD_EPSILON)
+		{
+			p /= btSqrt(len2);
+		}
+		else
+		{
+			p = trA.getBasis().getColumn(1);
+		}
+		// make one more ortho
+		q = ax1.cross(p);
+		// fill two rows
+		tmpA = relA.cross(p);
+		tmpB = relB.cross(p);
+		for (i=0; i<3; i++) info->m_J1angularAxis[s2+i] = tmpA[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s2+i] = -tmpB[i];
+		tmpA = relA.cross(q);
+		tmpB = relB.cross(q);
+		if(hasStaticBody && getSolveAngLimit())
+		{ // to make constraint between static and dynamic objects more rigid
+			// remove wA (or wB) from equation if angular limit is hit
+			tmpB *= factB;
+			tmpA *= factA;
+		}
+		for (i=0; i<3; i++) info->m_J1angularAxis[s3+i] = tmpA[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s3+i] = -tmpB[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s2+i] = p[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s3+i] = q[i];
 	}
-	else 
-	{
-		factA = btScalar(0.5f);
-	}
-	if(factA > 0.99f) factA = 0.99f;
-	if(factA < 0.01f) factA = 0.01f;
-	factB = btScalar(1.0f) - factA;
-	c = bodyB_trans.getOrigin() - bodyA_trans.getOrigin();
-	btVector3 tmp = c.cross(p);
-	for (i=0; i<3; i++) info->m_J1angularAxis[s2+i] = factA*tmp[i];
-	for (i=0; i<3; i++) info->m_J2angularAxis[s2+i] = factB*tmp[i];
-	tmp = c.cross(q);
-	for (i=0; i<3; i++) info->m_J1angularAxis[s3+i] = factA*tmp[i];
-	for (i=0; i<3; i++) info->m_J2angularAxis[s3+i] = factB*tmp[i];
+	else
+	{	// old way - maybe incorrect if bodies are not on the slider axis
+		// see discussion "Bug in slider constraint" http://bulletphysics.org/Bullet/phpBB3/viewtopic.php?f=9&t=4024&start=0
+		c = bodyB_trans.getOrigin() - bodyA_trans.getOrigin();
+		btVector3 tmp = c.cross(p);
+		for (i=0; i<3; i++) info->m_J1angularAxis[s2+i] = factA*tmp[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s2+i] = factB*tmp[i];
+		tmp = c.cross(q);
+		for (i=0; i<3; i++) info->m_J1angularAxis[s3+i] = factA*tmp[i];
+		for (i=0; i<3; i++) info->m_J2angularAxis[s3+i] = factB*tmp[i];
 
-	for (i=0; i<3; i++) info->m_J1linearAxis[s2+i] = p[i];
-	for (i=0; i<3; i++) info->m_J1linearAxis[s3+i] = q[i];
-	// compute two elements of right hand side. we want to align the offset
-	// point (in body 2's frame) with the center of body 1.
-	btVector3 ofs; // offset point in global coordinates
-	ofs = trB.getOrigin() - trA.getOrigin();
-	k = info->fps * info->erp * getSoftnessOrthoLin();
-	info->m_constraintError[s2] = k * p.dot(ofs);
-	info->m_constraintError[s3] = k * q.dot(ofs);
-	int nrow = 3; // last filled row
-	int srow;
-	// check linear limits linear
-	btScalar limit_err = btScalar(0.0);
-	int limit = 0;
+		for (i=0; i<3; i++) info->m_J1linearAxis[s2+i] = p[i];
+		for (i=0; i<3; i++) info->m_J1linearAxis[s3+i] = q[i];
+	}
+	// compute two elements of right hand side
+
+	//	k = info->fps * info->erp * getSoftnessOrthoLin();
+	currERP = (m_flags & BT_SLIDER_FLAGS_ERP_ORTLIN) ? m_softnessOrthoLin : m_softnessOrthoLin * info->erp;
+	k = info->fps * currERP;
+
+	btScalar rhs = k * p.dot(ofs);
+	info->m_constraintError[s2] = rhs;
+	rhs = k * q.dot(ofs);
+	info->m_constraintError[s3] = rhs;
+	if(m_flags & BT_SLIDER_FLAGS_CFM_ORTLIN)
+	{
+		info->cfm[s2] = m_cfmOrthoLin;
+		info->cfm[s3] = m_cfmOrthoLin;
+	}
+
+
+	// check linear limits
+	limit_err = btScalar(0.0);
+	limit = 0;
 	if(getSolveLinLimit())
 	{
 		limit_err = getLinDepth() *  signFact;
 		limit = (limit_err > btScalar(0.0)) ? 2 : 1;
 	}
-	int powered = 0;
+	powered = 0;
 	if(getPoweredLinMotor())
 	{
 		powered = 1;
@@ -349,16 +486,32 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 		// constraint force is applied at must lie along the same ax1 axis.
 		// a torque couple will result in limited slider-jointed free
 		// bodies from gaining angular momentum.
-		// the solution used here is to apply the constraint forces at the center of mass of the two bodies
-		btVector3 ltd;	// Linear Torque Decoupling vector (a torque)
-//		c = btScalar(0.5) * c;
-		ltd = c.cross(ax1);
-		info->m_J1angularAxis[srow+0] = factA*ltd[0];
-		info->m_J1angularAxis[srow+1] = factA*ltd[1];
-		info->m_J1angularAxis[srow+2] = factA*ltd[2];
-		info->m_J2angularAxis[srow+0] = factB*ltd[0];
-		info->m_J2angularAxis[srow+1] = factB*ltd[1];
-		info->m_J2angularAxis[srow+2] = factB*ltd[2];
+		if(m_useOffsetForConstraintFrame)
+		{
+			// this is needed only when bodyA and bodyB are both dynamic.
+			if(!hasStaticBody)
+			{
+				tmpA = relA.cross(ax1);
+				tmpB = relB.cross(ax1);
+				info->m_J1angularAxis[srow+0] = tmpA[0];
+				info->m_J1angularAxis[srow+1] = tmpA[1];
+				info->m_J1angularAxis[srow+2] = tmpA[2];
+				info->m_J2angularAxis[srow+0] = -tmpB[0];
+				info->m_J2angularAxis[srow+1] = -tmpB[1];
+				info->m_J2angularAxis[srow+2] = -tmpB[2];
+			}
+		}
+		else
+		{ // The old way. May be incorrect if bodies are not on the slider axis
+			btVector3 ltd;	// Linear Torque Decoupling vector (a torque)
+			ltd = c.cross(ax1);
+			info->m_J1angularAxis[srow+0] = factA*ltd[0];
+			info->m_J1angularAxis[srow+1] = factA*ltd[1];
+			info->m_J1angularAxis[srow+2] = factA*ltd[2];
+			info->m_J2angularAxis[srow+0] = factB*ltd[0];
+			info->m_J2angularAxis[srow+1] = factB*ltd[1];
+			info->m_J2angularAxis[srow+2] = factB*ltd[2];
+		}
 		// right-hand part
 		btScalar lostop = getLowerLinLimit();
 		btScalar histop = getUpperLinLimit();
@@ -369,21 +522,27 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 		info->m_constraintError[srow] = 0.;
 		info->m_lowerLimit[srow] = 0.;
 		info->m_upperLimit[srow] = 0.;
+		currERP = (m_flags & BT_SLIDER_FLAGS_ERP_LIMLIN) ? m_softnessLimLin : info->erp;
 		if(powered)
 		{
-            info->cfm[nrow] = btScalar(0.0); 
+			if(m_flags & BT_SLIDER_FLAGS_CFM_DIRLIN)
+			{
+				info->cfm[srow] = m_cfmDirLin;
+			}
 			btScalar tag_vel = getTargetLinMotorVelocity();
-			btScalar mot_fact = getMotorFactor(m_linPos, m_lowerLinLimit, m_upperLinLimit, tag_vel, info->fps * info->erp);
-//			info->m_constraintError[srow] += mot_fact * getTargetLinMotorVelocity();
+			btScalar mot_fact = getMotorFactor(m_linPos, m_lowerLinLimit, m_upperLinLimit, tag_vel, info->fps * currERP);
 			info->m_constraintError[srow] -= signFact * mot_fact * getTargetLinMotorVelocity();
 			info->m_lowerLimit[srow] += -getMaxLinMotorForce() * info->fps;
 			info->m_upperLimit[srow] += getMaxLinMotorForce() * info->fps;
 		}
 		if(limit)
 		{
-			k = info->fps * info->erp;
+			k = info->fps * currERP;
 			info->m_constraintError[srow] += k * limit_err;
-			info->cfm[srow] = btScalar(0.0); // stop_cfm;
+			if(m_flags & BT_SLIDER_FLAGS_CFM_LIMLIN)
+			{
+				info->cfm[srow] = m_cfmLimLin;
+			}
 			if(lostop == histop) 
 			{	// limited low and high simultaneously
 				info->m_lowerLimit[srow] = -SIMD_INFINITY;
@@ -466,19 +625,26 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 		{  // the joint motor is ineffective
 			powered = 0;
 		}
+		currERP = (m_flags & BT_SLIDER_FLAGS_ERP_LIMANG) ? m_softnessLimAng : info->erp;
 		if(powered)
 		{
-            info->cfm[srow] = btScalar(0.0); 
-			btScalar mot_fact = getMotorFactor(m_angPos, m_lowerAngLimit, m_upperAngLimit, getTargetAngMotorVelocity(), info->fps * info->erp);
+			if(m_flags & BT_SLIDER_FLAGS_CFM_DIRANG)
+			{
+				info->cfm[srow] = m_cfmDirAng;
+			}
+			btScalar mot_fact = getMotorFactor(m_angPos, m_lowerAngLimit, m_upperAngLimit, getTargetAngMotorVelocity(), info->fps * currERP);
 			info->m_constraintError[srow] = mot_fact * getTargetAngMotorVelocity();
 			info->m_lowerLimit[srow] = -getMaxAngMotorForce() * info->fps;
 			info->m_upperLimit[srow] = getMaxAngMotorForce() * info->fps;
 		}
 		if(limit)
 		{
-			k = info->fps * info->erp;
+			k = info->fps * currERP;
 			info->m_constraintError[srow] += k * limit_err;
-			info->cfm[srow] = btScalar(0.0); // stop_cfm;
+			if(m_flags & BT_SLIDER_FLAGS_CFM_LIMANG)
+			{
+				info->cfm[srow] = m_cfmLimAng;
+			}
 			if(lostop == histop) 
 			{
 				// limited low and high simultaneously
@@ -532,320 +698,160 @@ void btSliderConstraint::getInfo2NonVirtual(btConstraintInfo2* info, const btTra
 }
 
 
-
-void btSliderConstraint::solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar timeStep)
+///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+///If no axis is provided, it uses the default axis for this constraint.
+void btSliderConstraint::setParam(int num, btScalar value, int axis)
 {
-	if (m_useSolveConstraintObsolete)
+	switch(num)
 	{
-		m_timeStep = timeStep;
-		if(m_useLinearReferenceFrameA)
+	case BT_CONSTRAINT_STOP_ERP :
+		if(axis < 1)
 		{
-			solveConstraintInt(m_rbA,bodyA, m_rbB,bodyB);
+			m_softnessLimLin = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_LIMLIN;
+		}
+		else if(axis < 3)
+		{
+			m_softnessOrthoLin = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_ORTLIN;
+		}
+		else if(axis == 3)
+		{
+			m_softnessLimAng = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_LIMANG;
+		}
+		else if(axis < 6)
+		{
+			m_softnessOrthoAng = value;
+			m_flags |= BT_SLIDER_FLAGS_ERP_ORTANG;
 		}
 		else
 		{
-			solveConstraintInt(m_rbB,bodyB, m_rbA,bodyA);
+			btAssertConstrParams(0);
 		}
-	}
-}
-
-
-
-void btSliderConstraint::solveConstraintInt(btRigidBody& rbA, btSolverBody& bodyA,btRigidBody& rbB, btSolverBody& bodyB)
-{
-#ifndef __SPU__
-    int i;
-    // linear
-    btVector3 velA;
-	bodyA.getVelocityInLocalPointObsolete(m_relPosA,velA);
-    btVector3 velB;
-	bodyB.getVelocityInLocalPointObsolete(m_relPosB,velB);
-    btVector3 vel = velA - velB;
-	for(i = 0; i < 3; i++)
-    {
-		const btVector3& normal = m_jacLin[i].m_linearJointAxis;
-		btScalar rel_vel = normal.dot(vel);
-		// calculate positional error
-		btScalar depth = m_depth[i];
-		// get parameters
-		btScalar softness = (i) ? m_softnessOrthoLin : (m_solveLinLim ? m_softnessLimLin : m_softnessDirLin);
-		btScalar restitution = (i) ? m_restitutionOrthoLin : (m_solveLinLim ? m_restitutionLimLin : m_restitutionDirLin);
-		btScalar damping = (i) ? m_dampingOrthoLin : (m_solveLinLim ? m_dampingLimLin : m_dampingDirLin);
-		// calcutate and apply impulse
-		btScalar normalImpulse = softness * (restitution * depth / m_timeStep - damping * rel_vel) * m_jacLinDiagABInv[i];
-		btVector3 impulse_vector = normal * normalImpulse;
-		
-		//rbA.applyImpulse( impulse_vector, m_relPosA);
-		//rbB.applyImpulse(-impulse_vector, m_relPosB);
+		break;
+	case BT_CONSTRAINT_CFM :
+		if(axis < 1)
 		{
-			btVector3 ftorqueAxis1 = m_relPosA.cross(normal);
-			btVector3 ftorqueAxis2 = m_relPosB.cross(normal);
-			bodyA.applyImpulse(normal*rbA.getInvMass(), rbA.getInvInertiaTensorWorld()*ftorqueAxis1,normalImpulse);
-			bodyB.applyImpulse(normal*rbB.getInvMass(), rbB.getInvInertiaTensorWorld()*ftorqueAxis2,-normalImpulse);
+			m_cfmDirLin = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_DIRLIN;
 		}
-
-
-
-		if(m_poweredLinMotor && (!i))
-		{ // apply linear motor
-			if(m_accumulatedLinMotorImpulse < m_maxLinMotorForce)
-			{
-				btScalar desiredMotorVel = m_targetLinMotorVelocity;
-				btScalar motor_relvel = desiredMotorVel + rel_vel;
-				normalImpulse = -motor_relvel * m_jacLinDiagABInv[i];
-				// clamp accumulated impulse
-				btScalar new_acc = m_accumulatedLinMotorImpulse + btFabs(normalImpulse);
-				if(new_acc  > m_maxLinMotorForce)
-				{
-					new_acc = m_maxLinMotorForce;
-				}
-				btScalar del = new_acc  - m_accumulatedLinMotorImpulse;
-				if(normalImpulse < btScalar(0.0))
-				{
-					normalImpulse = -del;
-				}
-				else
-				{
-					normalImpulse = del;
-				}
-				m_accumulatedLinMotorImpulse = new_acc;
-				// apply clamped impulse
-				impulse_vector = normal * normalImpulse;
-				//rbA.applyImpulse( impulse_vector, m_relPosA);
-				//rbB.applyImpulse(-impulse_vector, m_relPosB);
-
-				{
-					btVector3 ftorqueAxis1 = m_relPosA.cross(normal);
-					btVector3 ftorqueAxis2 = m_relPosB.cross(normal);
-					bodyA.applyImpulse(normal*rbA.getInvMass(), rbA.getInvInertiaTensorWorld()*ftorqueAxis1,normalImpulse);
-					bodyB.applyImpulse(normal*rbB.getInvMass(), rbB.getInvInertiaTensorWorld()*ftorqueAxis2,-normalImpulse);
-				}
-
-
-
-			}
-		}
-    }
-	// angular 
-	// get axes in world space
-	btVector3 axisA =  m_calculatedTransformA.getBasis().getColumn(0);
-	btVector3 axisB =  m_calculatedTransformB.getBasis().getColumn(0);
-
-	btVector3 angVelA;
-	bodyA.getAngularVelocity(angVelA);
-	btVector3 angVelB;
-	bodyB.getAngularVelocity(angVelB);
-
-	btVector3 angVelAroundAxisA = axisA * axisA.dot(angVelA);
-	btVector3 angVelAroundAxisB = axisB * axisB.dot(angVelB);
-
-	btVector3 angAorthog = angVelA - angVelAroundAxisA;
-	btVector3 angBorthog = angVelB - angVelAroundAxisB;
-	btVector3 velrelOrthog = angAorthog-angBorthog;
-	//solve orthogonal angular velocity correction
-	btScalar len = velrelOrthog.length();
-	btScalar orthorImpulseMag = 0.f;
-
-	if (len > btScalar(0.00001))
-	{
-		btVector3 normal = velrelOrthog.normalized();
-		btScalar denom = rbA.computeAngularImpulseDenominator(normal) + rbB.computeAngularImpulseDenominator(normal);
-		//velrelOrthog *= (btScalar(1.)/denom) * m_dampingOrthoAng * m_softnessOrthoAng;
-		orthorImpulseMag = (btScalar(1.)/denom) * m_dampingOrthoAng * m_softnessOrthoAng;
-	}
-	//solve angular positional correction
-	btVector3 angularError = axisA.cross(axisB) *(btScalar(1.)/m_timeStep);
-	btVector3 angularAxis = angularError;
-	btScalar angularImpulseMag = 0;
-
-	btScalar len2 = angularError.length();
-	if (len2>btScalar(0.00001))
-	{
-		btVector3 normal2 = angularError.normalized();
-		btScalar denom2 = rbA.computeAngularImpulseDenominator(normal2) + rbB.computeAngularImpulseDenominator(normal2);
-		angularImpulseMag = (btScalar(1.)/denom2) * m_restitutionOrthoAng * m_softnessOrthoAng;
-		angularError *= angularImpulseMag;
-	}
-	// apply impulse
-	//rbA.applyTorqueImpulse(-velrelOrthog+angularError);
-	//rbB.applyTorqueImpulse(velrelOrthog-angularError);
-
-	bodyA.applyImpulse(btVector3(0,0,0), rbA.getInvInertiaTensorWorld()*velrelOrthog,-orthorImpulseMag);
-	bodyB.applyImpulse(btVector3(0,0,0), rbB.getInvInertiaTensorWorld()*velrelOrthog,orthorImpulseMag);
-	bodyA.applyImpulse(btVector3(0,0,0), rbA.getInvInertiaTensorWorld()*angularAxis,angularImpulseMag);
-	bodyB.applyImpulse(btVector3(0,0,0), rbB.getInvInertiaTensorWorld()*angularAxis,-angularImpulseMag);
-
-
-	btScalar impulseMag;
-	//solve angular limits
-	if(m_solveAngLim)
-	{
-		impulseMag = (angVelB - angVelA).dot(axisA) * m_dampingLimAng + m_angDepth * m_restitutionLimAng / m_timeStep;
-		impulseMag *= m_kAngle * m_softnessLimAng;
-	}
-	else
-	{
-		impulseMag = (angVelB - angVelA).dot(axisA) * m_dampingDirAng + m_angDepth * m_restitutionDirAng / m_timeStep;
-		impulseMag *= m_kAngle * m_softnessDirAng;
-	}
-	btVector3 impulse = axisA * impulseMag;
-	//rbA.applyTorqueImpulse(impulse);
-	//rbB.applyTorqueImpulse(-impulse);
-
-	bodyA.applyImpulse(btVector3(0,0,0), rbA.getInvInertiaTensorWorld()*axisA,impulseMag);
-	bodyB.applyImpulse(btVector3(0,0,0), rbB.getInvInertiaTensorWorld()*axisA,-impulseMag);
-
-
-
-	//apply angular motor
-	if(m_poweredAngMotor) 
-	{
-		if(m_accumulatedAngMotorImpulse < m_maxAngMotorForce)
+		else if(axis == 3)
 		{
-			btVector3 velrel = angVelAroundAxisA - angVelAroundAxisB;
-			btScalar projRelVel = velrel.dot(axisA);
-
-			btScalar desiredMotorVel = m_targetAngMotorVelocity;
-			btScalar motor_relvel = desiredMotorVel - projRelVel;
-
-			btScalar angImpulse = m_kAngle * motor_relvel;
-			// clamp accumulated impulse
-			btScalar new_acc = m_accumulatedAngMotorImpulse + btFabs(angImpulse);
-			if(new_acc  > m_maxAngMotorForce)
-			{
-				new_acc = m_maxAngMotorForce;
-			}
-			btScalar del = new_acc  - m_accumulatedAngMotorImpulse;
-			if(angImpulse < btScalar(0.0))
-			{
-				angImpulse = -del;
-			}
-			else
-			{
-				angImpulse = del;
-			}
-			m_accumulatedAngMotorImpulse = new_acc;
-			// apply clamped impulse
-			btVector3 motorImp = angImpulse * axisA;
-			//rbA.applyTorqueImpulse(motorImp);
-			//rbB.applyTorqueImpulse(-motorImp);
-
-			bodyA.applyImpulse(btVector3(0,0,0), rbA.getInvInertiaTensorWorld()*axisA,angImpulse);
-			bodyB.applyImpulse(btVector3(0,0,0), rbB.getInvInertiaTensorWorld()*axisA,-angImpulse);
-		}
-	}
-#endif //__SPU__
-}
-
-
-
-
-
-void btSliderConstraint::calculateTransforms(const btTransform& transA,const btTransform& transB)
-{
-	if(m_useLinearReferenceFrameA || (!m_useSolveConstraintObsolete))
-	{
-		m_calculatedTransformA = transA * m_frameInA;
-		m_calculatedTransformB = transB * m_frameInB;
-	}
-	else
-	{
-		m_calculatedTransformA = transB * m_frameInB;
-		m_calculatedTransformB = transA * m_frameInA;
-	}
-	m_realPivotAInW = m_calculatedTransformA.getOrigin();
-	m_realPivotBInW = m_calculatedTransformB.getOrigin();
-	m_sliderAxis = m_calculatedTransformA.getBasis().getColumn(0); // along X
-	if(m_useLinearReferenceFrameA || m_useSolveConstraintObsolete)
-	{
-		m_delta = m_realPivotBInW - m_realPivotAInW;
-	}
-	else
-	{
-		m_delta = m_realPivotAInW - m_realPivotBInW;
-	}
-	m_projPivotInW = m_realPivotAInW + m_sliderAxis.dot(m_delta) * m_sliderAxis;
-    btVector3 normalWorld;
-    int i;
-    //linear part
-    for(i = 0; i < 3; i++)
-    {
-		normalWorld = m_calculatedTransformA.getBasis().getColumn(i);
-		m_depth[i] = m_delta.dot(normalWorld);
-    }
-}
- 
-
-
-void btSliderConstraint::testLinLimits(void)
-{
-	m_solveLinLim = false;
-	m_linPos = m_depth[0];
-	if(m_lowerLinLimit <= m_upperLinLimit)
-	{
-		if(m_depth[0] > m_upperLinLimit)
-		{
-			m_depth[0] -= m_upperLinLimit;
-			m_solveLinLim = true;
-		}
-		else if(m_depth[0] < m_lowerLinLimit)
-		{
-			m_depth[0] -= m_lowerLinLimit;
-			m_solveLinLim = true;
+			m_cfmDirAng = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_DIRANG;
 		}
 		else
 		{
-			m_depth[0] = btScalar(0.);
+			btAssertConstrParams(0);
 		}
-	}
-	else
-	{
-		m_depth[0] = btScalar(0.);
-	}
-}
-
-
-
-void btSliderConstraint::testAngLimits(void)
-{
-	m_angDepth = btScalar(0.);
-	m_solveAngLim = false;
-	if(m_lowerAngLimit <= m_upperAngLimit)
-	{
-		const btVector3 axisA0 = m_calculatedTransformA.getBasis().getColumn(1);
-		const btVector3 axisA1 = m_calculatedTransformA.getBasis().getColumn(2);
-		const btVector3 axisB0 = m_calculatedTransformB.getBasis().getColumn(1);
-		btScalar rot = btAtan2Fast(axisB0.dot(axisA1), axisB0.dot(axisA0));  
-		rot = btAdjustAngleToLimits(rot, m_lowerAngLimit, m_upperAngLimit);
-		m_angPos = rot;
-		if(rot < m_lowerAngLimit)
+		break;
+	case BT_CONSTRAINT_STOP_CFM :
+		if(axis < 1)
 		{
-			m_angDepth = rot - m_lowerAngLimit;
-			m_solveAngLim = true;
-		} 
-		else if(rot > m_upperAngLimit)
-		{
-			m_angDepth = rot - m_upperAngLimit;
-			m_solveAngLim = true;
+			m_cfmLimLin = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_LIMLIN;
 		}
+		else if(axis < 3)
+		{
+			m_cfmOrthoLin = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_ORTLIN;
+		}
+		else if(axis == 3)
+		{
+			m_cfmLimAng = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_LIMANG;
+		}
+		else if(axis < 6)
+		{
+			m_cfmOrthoAng = value;
+			m_flags |= BT_SLIDER_FLAGS_CFM_ORTANG;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
 	}
 }
-	
 
-
-btVector3 btSliderConstraint::getAncorInA(void)
+///return the local value of parameter
+btScalar btSliderConstraint::getParam(int num, int axis) const 
 {
-	btVector3 ancorInA;
-	ancorInA = m_realPivotAInW + (m_lowerLinLimit + m_upperLinLimit) * btScalar(0.5) * m_sliderAxis;
-	ancorInA = m_rbA.getCenterOfMassTransform().inverse() * ancorInA;
-	return ancorInA;
+	btScalar retVal(SIMD_INFINITY);
+	switch(num)
+	{
+	case BT_CONSTRAINT_STOP_ERP :
+		if(axis < 1)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_LIMLIN);
+			retVal = m_softnessLimLin;
+		}
+		else if(axis < 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_ORTLIN);
+			retVal = m_softnessOrthoLin;
+		}
+		else if(axis == 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_LIMANG);
+			retVal = m_softnessLimAng;
+		}
+		else if(axis < 6)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_ERP_ORTANG);
+			retVal = m_softnessOrthoAng;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	case BT_CONSTRAINT_CFM :
+		if(axis < 1)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_DIRLIN);
+			retVal = m_cfmDirLin;
+		}
+		else if(axis == 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_DIRANG);
+			retVal = m_cfmDirAng;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	case BT_CONSTRAINT_STOP_CFM :
+		if(axis < 1)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_LIMLIN);
+			retVal = m_cfmLimLin;
+		}
+		else if(axis < 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_ORTLIN);
+			retVal = m_cfmOrthoLin;
+		}
+		else if(axis == 3)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_LIMANG);
+			retVal = m_cfmLimAng;
+		}
+		else if(axis < 6)
+		{
+			btAssertConstrParams(m_flags & BT_SLIDER_FLAGS_CFM_ORTANG);
+			retVal = m_cfmOrthoAng;
+		}
+		else
+		{
+			btAssertConstrParams(0);
+		}
+		break;
+	}
+	return retVal;
 }
 
 
 
-btVector3 btSliderConstraint::getAncorInB(void)
-{
-	btVector3 ancorInB;
-	ancorInB = m_frameInB.getOrigin();
-	return ancorInB;
-}
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.h
index 57b0ed062..ca8e715bc 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSliderConstraint.h
@@ -22,8 +22,8 @@ TODO:
  - add conversion for ODE constraint solver
 */
 
-#ifndef SLIDER_CONSTRAINT_H
-#define SLIDER_CONSTRAINT_H
+#ifndef BT_SLIDER_CONSTRAINT_H
+#define BT_SLIDER_CONSTRAINT_H
 
 
 
@@ -40,14 +40,32 @@ class btRigidBody;
 #define SLIDER_CONSTRAINT_DEF_SOFTNESS		(btScalar(1.0))
 #define SLIDER_CONSTRAINT_DEF_DAMPING		(btScalar(1.0))
 #define SLIDER_CONSTRAINT_DEF_RESTITUTION	(btScalar(0.7))
+#define SLIDER_CONSTRAINT_DEF_CFM			(btScalar(0.f))
 
 
+enum btSliderFlags
+{
+	BT_SLIDER_FLAGS_CFM_DIRLIN = (1 << 0),
+	BT_SLIDER_FLAGS_ERP_DIRLIN = (1 << 1),
+	BT_SLIDER_FLAGS_CFM_DIRANG = (1 << 2),
+	BT_SLIDER_FLAGS_ERP_DIRANG = (1 << 3),
+	BT_SLIDER_FLAGS_CFM_ORTLIN = (1 << 4),
+	BT_SLIDER_FLAGS_ERP_ORTLIN = (1 << 5),
+	BT_SLIDER_FLAGS_CFM_ORTANG = (1 << 6),
+	BT_SLIDER_FLAGS_ERP_ORTANG = (1 << 7),
+	BT_SLIDER_FLAGS_CFM_LIMLIN = (1 << 8),
+	BT_SLIDER_FLAGS_ERP_LIMLIN = (1 << 9),
+	BT_SLIDER_FLAGS_CFM_LIMANG = (1 << 10),
+	BT_SLIDER_FLAGS_ERP_LIMANG = (1 << 11)
+};
 
-class btSliderConstraint : public btTypedConstraint
+
+ATTRIBUTE_ALIGNED16(class) btSliderConstraint : public btTypedConstraint
 {
 protected:
 	///for backwards compatibility during the transition to 'getInfo/getInfo2'
 	bool		m_useSolveConstraintObsolete;
+	bool		m_useOffsetForConstraintFrame;
 	btTransform	m_frameInA;
     btTransform	m_frameInB;
 	// use frameA fo define limits, if true
@@ -67,26 +85,39 @@ protected:
 	btScalar m_softnessDirLin;
 	btScalar m_restitutionDirLin;
 	btScalar m_dampingDirLin;
+	btScalar m_cfmDirLin;
+
 	btScalar m_softnessDirAng;
 	btScalar m_restitutionDirAng;
 	btScalar m_dampingDirAng;
+	btScalar m_cfmDirAng;
+
 	btScalar m_softnessLimLin;
 	btScalar m_restitutionLimLin;
 	btScalar m_dampingLimLin;
+	btScalar m_cfmLimLin;
+
 	btScalar m_softnessLimAng;
 	btScalar m_restitutionLimAng;
 	btScalar m_dampingLimAng;
+	btScalar m_cfmLimAng;
+
 	btScalar m_softnessOrthoLin;
 	btScalar m_restitutionOrthoLin;
 	btScalar m_dampingOrthoLin;
+	btScalar m_cfmOrthoLin;
+
 	btScalar m_softnessOrthoAng;
 	btScalar m_restitutionOrthoAng;
 	btScalar m_dampingOrthoAng;
+	btScalar m_cfmOrthoAng;
 	
 	// for interlal use
 	bool m_solveLinLim;
 	bool m_solveAngLim;
 
+	int m_flags;
+
 	btJacobianEntry	m_jacLin[3];
 	btScalar		m_jacLinDiagABInv[3];
 
@@ -124,12 +155,14 @@ protected:
 	//------------------------    
 	void initParams();
 public:
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	// constructors
     btSliderConstraint(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB ,bool useLinearReferenceFrameA);
-    btSliderConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameB);
-    btSliderConstraint();
+    btSliderConstraint(btRigidBody& rbB, const btTransform& frameInB, bool useLinearReferenceFrameA);
+
 	// overrides
-    virtual void	buildJacobian();
+
     virtual void getInfo1 (btConstraintInfo1* info);
 
 	void getInfo1NonVirtual(btConstraintInfo1* info);
@@ -138,8 +171,6 @@ public:
 
 	void getInfo2NonVirtual(btConstraintInfo2* info, const btTransform& transA, const btTransform& transB,const btVector3& linVelA,const btVector3& linVelB, btScalar rbAinvMass,btScalar rbBinvMass);
 
-    virtual	void	solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep);
-	
 
 	// access
     const btRigidBody& getRigidBodyA() const { return m_rbA; }
@@ -207,7 +238,10 @@ public:
 	btScalar getTargetAngMotorVelocity() { return m_targetAngMotorVelocity; }
 	void setMaxAngMotorForce(btScalar maxAngMotorForce) { m_maxAngMotorForce = maxAngMotorForce; }
 	btScalar getMaxAngMotorForce() { return m_maxAngMotorForce; }
-	btScalar getLinearPos() { return m_linPos; }
+
+	btScalar getLinearPos() const { return m_linPos; }
+	btScalar getAngularPos() const { return m_angPos; }
+	
 	
 
 	// access for ODE solver
@@ -215,20 +249,87 @@ public:
 	btScalar getLinDepth() { return m_depth[0]; }
 	bool getSolveAngLimit() { return m_solveAngLim; }
 	btScalar getAngDepth() { return m_angDepth; }
-	// internal
-    void	buildJacobianInt(btRigidBody& rbA, btRigidBody& rbB, const btTransform& frameInA, const btTransform& frameInB);
-    void	solveConstraintInt(btRigidBody& rbA, btSolverBody& bodyA,btRigidBody& rbB, btSolverBody& bodyB);
 	// shared code used by ODE solver
 	void	calculateTransforms(const btTransform& transA,const btTransform& transB);
 	void	testLinLimits();
-	void	testLinLimits2(btConstraintInfo2* info);
 	void	testAngLimits();
 	// access for PE Solver
 	btVector3 getAncorInA();
 	btVector3 getAncorInB();
+	// access for UseFrameOffset
+	bool getUseFrameOffset() { return m_useOffsetForConstraintFrame; }
+	void setUseFrameOffset(bool frameOffsetOnOff) { m_useOffsetForConstraintFrame = frameOffsetOnOff; }
+
+	void setFrames(const btTransform& frameA, const btTransform& frameB) 
+	{ 
+		m_frameInA=frameA; 
+		m_frameInB=frameB;
+		calculateTransforms(m_rbA.getCenterOfMassTransform(),m_rbB.getCenterOfMassTransform());
+		buildJacobian();
+	} 
+
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1);
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const;
+
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
+
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btSliderConstraintData
+{
+	btTypedConstraintData	m_typeConstraintData;
+	btTransformFloatData m_rbAFrame; // constraint axii. Assumes z is hinge axis.
+	btTransformFloatData m_rbBFrame;
+	
+	float	m_linearUpperLimit;
+	float	m_linearLowerLimit;
+
+	float	m_angularUpperLimit;
+	float	m_angularLowerLimit;
+
+	int	m_useLinearReferenceFrameA;
+	int m_useOffsetForConstraintFrame;
+
 };
 
 
+SIMD_FORCE_INLINE		int	btSliderConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btSliderConstraintData);
+}
 
-#endif //SLIDER_CONSTRAINT_H
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+SIMD_FORCE_INLINE	const char*	btSliderConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+
+	btSliderConstraintData* sliderData = (btSliderConstraintData*) dataBuffer;
+	btTypedConstraint::serialize(&sliderData->m_typeConstraintData,serializer);
+
+	m_frameInA.serializeFloat(sliderData->m_rbAFrame);
+	m_frameInB.serializeFloat(sliderData->m_rbBFrame);
+
+	sliderData->m_linearUpperLimit = float(m_upperLinLimit);
+	sliderData->m_linearLowerLimit = float(m_lowerLinLimit);
+
+	sliderData->m_angularUpperLimit = float(m_upperAngLimit);
+	sliderData->m_angularLowerLimit = float(m_lowerAngLimit);
+
+	sliderData->m_useLinearReferenceFrameA = m_useLinearReferenceFrameA;
+	sliderData->m_useOffsetForConstraintFrame = m_useOffsetForConstraintFrame;
+
+	return "btSliderConstraintData";
+}
+
+
+
+#endif //BT_SLIDER_CONSTRAINT_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h
index 057d3fac8..e8bfabf86 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SOLVE_2LINEAR_CONSTRAINT_H
-#define SOLVE_2LINEAR_CONSTRAINT_H
+#ifndef BT_SOLVE_2LINEAR_CONSTRAINT_H
+#define BT_SOLVE_2LINEAR_CONSTRAINT_H
 
 #include "LinearMath/btMatrix3x3.h"
 #include "LinearMath/btVector3.h"
@@ -104,4 +104,4 @@ public:
 
 };
 
-#endif //SOLVE_2LINEAR_CONSTRAINT_H
+#endif //BT_SOLVE_2LINEAR_CONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverBody.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverBody.h
index 57a8fd31d..4e1a8df6a 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverBody.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverBody.h
@@ -19,7 +19,7 @@ subject to the following restrictions:
 class	btRigidBody;
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btMatrix3x3.h"
-#include "BulletDynamics/Dynamics/btRigidBody.h"
+
 #include "LinearMath/btAlignedAllocator.h"
 #include "LinearMath/btTransformUtil.h"
 
@@ -105,23 +105,35 @@ operator+(const btSimdScalar& v1, const btSimdScalar& v2)
 #endif
 
 ///The btSolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
-ATTRIBUTE_ALIGNED16 (struct)	btSolverBody
+ATTRIBUTE_ALIGNED64 (struct)	btSolverBody
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
+	btTransform		m_worldTransform;
 	btVector3		m_deltaLinearVelocity;
 	btVector3		m_deltaAngularVelocity;
 	btVector3		m_angularFactor;
+	btVector3		m_linearFactor;
 	btVector3		m_invMass;
-	btScalar		m_friction;
-	btRigidBody*	m_originalBody;
 	btVector3		m_pushVelocity;
 	btVector3		m_turnVelocity;
+	btVector3		m_linearVelocity;
+	btVector3		m_angularVelocity;
 
+	btRigidBody*	m_originalBody;
+	void	setWorldTransform(const btTransform& worldTransform)
+	{
+		m_worldTransform = worldTransform;
+	}
+
+	const btTransform& getWorldTransform() const
+	{
+		return m_worldTransform;
+	}
 	
 	SIMD_FORCE_INLINE void	getVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
 	{
 		if (m_originalBody)
-			velocity = m_originalBody->getLinearVelocity()+m_deltaLinearVelocity + (m_originalBody->getAngularVelocity()+m_deltaAngularVelocity).cross(rel_pos);
+			velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
 		else
 			velocity.setValue(0,0,0);
 	}
@@ -129,7 +141,7 @@ ATTRIBUTE_ALIGNED16 (struct)	btSolverBody
 	SIMD_FORCE_INLINE void	getAngularVelocity(btVector3& angVel) const
 	{
 		if (m_originalBody)
-			angVel = m_originalBody->getAngularVelocity()+m_deltaAngularVelocity;
+			angVel =m_angularVelocity+m_deltaAngularVelocity;
 		else
 			angVel.setValue(0,0,0);
 	}
@@ -138,9 +150,9 @@ ATTRIBUTE_ALIGNED16 (struct)	btSolverBody
 	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
 	SIMD_FORCE_INLINE void applyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
 	{
-		//if (m_invMass)
+		if (m_originalBody)
 		{
-			m_deltaLinearVelocity += linearComponent*impulseMagnitude;
+			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
 			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
 		}
 	}
@@ -149,35 +161,125 @@ ATTRIBUTE_ALIGNED16 (struct)	btSolverBody
 	{
 		if (m_originalBody)
 		{
-			m_pushVelocity += linearComponent*impulseMagnitude;
+			m_pushVelocity += linearComponent*impulseMagnitude*m_linearFactor;
 			m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
 		}
 	}
+
+
+
+	const btVector3& getDeltaLinearVelocity() const
+	{
+		return m_deltaLinearVelocity;
+	}
+
+	const btVector3& getDeltaAngularVelocity() const
+	{
+		return m_deltaAngularVelocity;
+	}
+
+	const btVector3& getPushVelocity() const 
+	{
+		return m_pushVelocity;
+	}
+
+	const btVector3& getTurnVelocity() const 
+	{
+		return m_turnVelocity;
+	}
+
+
+	////////////////////////////////////////////////
+	///some internal methods, don't use them
+		
+	btVector3& internalGetDeltaLinearVelocity()
+	{
+		return m_deltaLinearVelocity;
+	}
+
+	btVector3& internalGetDeltaAngularVelocity()
+	{
+		return m_deltaAngularVelocity;
+	}
+
+	const btVector3& internalGetAngularFactor() const
+	{
+		return m_angularFactor;
+	}
+
+	const btVector3& internalGetInvMass() const
+	{
+		return m_invMass;
+	}
+
+	void internalSetInvMass(const btVector3& invMass)
+	{
+		m_invMass = invMass;
+	}
 	
+	btVector3& internalGetPushVelocity()
+	{
+		return m_pushVelocity;
+	}
+
+	btVector3& internalGetTurnVelocity()
+	{
+		return m_turnVelocity;
+	}
+
+	SIMD_FORCE_INLINE void	internalGetVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
+	{
+		velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
+	}
+
+	SIMD_FORCE_INLINE void	internalGetAngularVelocity(btVector3& angVel) const
+	{
+		angVel = m_angularVelocity+m_deltaAngularVelocity;
+	}
+
+
+	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
+	SIMD_FORCE_INLINE void internalApplyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
+	{
+		if (m_originalBody)
+		{
+			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
+			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
+		}
+	}
+		
+	
+	
+
 	void	writebackVelocity()
 	{
 		if (m_originalBody)
 		{
-			m_originalBody->setLinearVelocity(m_originalBody->getLinearVelocity()+ m_deltaLinearVelocity);
-			m_originalBody->setAngularVelocity(m_originalBody->getAngularVelocity()+m_deltaAngularVelocity);
+			m_linearVelocity +=m_deltaLinearVelocity;
+			m_angularVelocity += m_deltaAngularVelocity;
 			
 			//m_originalBody->setCompanionId(-1);
 		}
 	}
 
 
-	void	writebackVelocity(btScalar timeStep)
+	void	writebackVelocityAndTransform(btScalar timeStep, btScalar splitImpulseTurnErp)
 	{
+        (void) timeStep;
 		if (m_originalBody)
 		{
-			m_originalBody->setLinearVelocity(m_originalBody->getLinearVelocity()+ m_deltaLinearVelocity);
-			m_originalBody->setAngularVelocity(m_originalBody->getAngularVelocity()+m_deltaAngularVelocity);
+			m_linearVelocity += m_deltaLinearVelocity;
+			m_angularVelocity += m_deltaAngularVelocity;
 			
 			//correct the position/orientation based on push/turn recovery
 			btTransform newTransform;
-			btTransformUtil::integrateTransform(m_originalBody->getWorldTransform(),m_pushVelocity,m_turnVelocity,timeStep,newTransform);
-			m_originalBody->setWorldTransform(newTransform);
-			
+			if (m_pushVelocity[0]!=0.f || m_pushVelocity[1]!=0 || m_pushVelocity[2]!=0 || m_turnVelocity[0]!=0.f || m_turnVelocity[1]!=0 || m_turnVelocity[2]!=0)
+			{
+			//	btQuaternion orn = m_worldTransform.getRotation();
+				btTransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform);
+				m_worldTransform = newTransform;
+			}
+			//m_worldTransform.setRotation(orn);
 			//m_originalBody->setCompanionId(-1);
 		}
 	}
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverConstraint.h
index eb1aae1ff..c3951f664 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btSolverConstraint.h
@@ -20,6 +20,7 @@ class	btRigidBody;
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btMatrix3x3.h"
 #include "btJacobianEntry.h"
+#include "LinearMath/btAlignedObjectArray.h"
 
 //#define NO_FRICTION_TANGENTIALS 1
 #include "btSolverBody.h"
@@ -41,45 +42,27 @@ ATTRIBUTE_ALIGNED16 (struct)	btSolverConstraint
 	
 	mutable btSimdScalar	m_appliedPushImpulse;
 	mutable btSimdScalar	m_appliedImpulse;
-	
-	
+
 	btScalar	m_friction;
 	btScalar	m_jacDiagABInv;
-	union
-	{
-		int	m_numConsecutiveRowsPerKernel;
-		btScalar	m_unusedPadding0;
-	};
-
-	union
-	{
-		int			m_frictionIndex;
-		btScalar	m_unusedPadding1;
-	};
-	union
-	{
-		int			m_solverBodyIdA;
-		btScalar	m_unusedPadding2;
-	};
-	union
-	{
-		int			m_solverBodyIdB;
-		btScalar	m_unusedPadding3;
-	};
+	btScalar		m_rhs;
+	btScalar		m_cfm;
 	
-	union
+    btScalar		m_lowerLimit;
+	btScalar		m_upperLimit;
+	btScalar		m_rhsPenetration;
+    union
 	{
 		void*		m_originalContactPoint;
 		btScalar	m_unusedPadding4;
 	};
 
-	btScalar		m_rhs;
-	btScalar		m_cfm;
-	btScalar		m_lowerLimit;
-	btScalar		m_upperLimit;
-
-	btScalar		m_rhsPenetration;
+	int	m_overrideNumSolverIterations;
+    int			m_frictionIndex;
+	int m_solverBodyIdA;
+	int m_solverBodyIdB;
 
+    
 	enum		btSolverConstraintType
 	{
 		BT_SOLVER_CONTACT_1D = 0,
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp
index 235be5871..465c0746c 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.cpp
@@ -16,34 +16,25 @@ subject to the following restrictions:
 
 #include "btTypedConstraint.h"
 #include "BulletDynamics/Dynamics/btRigidBody.h"
+#include "LinearMath/btSerializer.h"
 
-static btRigidBody s_fixed(0, 0,0);
 
 #define DEFAULT_DEBUGDRAW_SIZE btScalar(0.3f)
 
-btTypedConstraint::btTypedConstraint(btTypedConstraintType type)
-:btTypedObject(type),
-m_userConstraintType(-1),
-m_userConstraintId(-1),
-m_needsFeedback(false),
-m_rbA(s_fixed),
-m_rbB(s_fixed),
-m_appliedImpulse(btScalar(0.)),
-m_dbgDrawSize(DEFAULT_DEBUGDRAW_SIZE)
-{
-	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
-}
 btTypedConstraint::btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA)
 :btTypedObject(type),
 m_userConstraintType(-1),
 m_userConstraintId(-1),
+m_breakingImpulseThreshold(SIMD_INFINITY),
+m_isEnabled(true),
 m_needsFeedback(false),
+m_overrideNumSolverIterations(-1),
 m_rbA(rbA),
-m_rbB(s_fixed),
+m_rbB(getFixedBody()),
 m_appliedImpulse(btScalar(0.)),
-m_dbgDrawSize(DEFAULT_DEBUGDRAW_SIZE)
+m_dbgDrawSize(DEFAULT_DEBUGDRAW_SIZE),
+m_jointFeedback(0)
 {
-	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
 }
 
 
@@ -51,14 +42,16 @@ btTypedConstraint::btTypedConstraint(btTypedConstraintType type, btRigidBody& rb
 :btTypedObject(type),
 m_userConstraintType(-1),
 m_userConstraintId(-1),
+m_breakingImpulseThreshold(SIMD_INFINITY),
+m_isEnabled(true),
 m_needsFeedback(false),
+m_overrideNumSolverIterations(-1),
 m_rbA(rbA),
 m_rbB(rbB),
 m_appliedImpulse(btScalar(0.)),
-m_dbgDrawSize(DEFAULT_DEBUGDRAW_SIZE)
+m_dbgDrawSize(DEFAULT_DEBUGDRAW_SIZE),
+m_jointFeedback(0)
 {
-	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
-
 }
 
 
@@ -113,4 +106,117 @@ btScalar btTypedConstraint::getMotorFactor(btScalar pos, btScalar lowLim, btScal
 	return lim_fact;
 }
 
+///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btTypedConstraint::serialize(void* dataBuffer, btSerializer* serializer) const
+{
+	btTypedConstraintData* tcd = (btTypedConstraintData*) dataBuffer;
 
+	tcd->m_rbA = (btRigidBodyData*)serializer->getUniquePointer(&m_rbA);
+	tcd->m_rbB = (btRigidBodyData*)serializer->getUniquePointer(&m_rbB);
+	char* name = (char*) serializer->findNameForPointer(this);
+	tcd->m_name = (char*)serializer->getUniquePointer(name);
+	if (tcd->m_name)
+	{
+		serializer->serializeName(name);
+	}
+
+	tcd->m_objectType = m_objectType;
+	tcd->m_needsFeedback = m_needsFeedback;
+	tcd->m_overrideNumSolverIterations = m_overrideNumSolverIterations;
+	tcd->m_breakingImpulseThreshold = float(m_breakingImpulseThreshold);
+	tcd->m_isEnabled = m_isEnabled? 1: 0;
+	
+	tcd->m_userConstraintId =m_userConstraintId;
+	tcd->m_userConstraintType =m_userConstraintType;
+
+	tcd->m_appliedImpulse = float(m_appliedImpulse);
+	tcd->m_dbgDrawSize = float(m_dbgDrawSize );
+
+	tcd->m_disableCollisionsBetweenLinkedBodies = false;
+
+	int i;
+	for (i=0;i<m_rbA.getNumConstraintRefs();i++)
+		if (m_rbA.getConstraintRef(i) == this)
+			tcd->m_disableCollisionsBetweenLinkedBodies = true;
+	for (i=0;i<m_rbB.getNumConstraintRefs();i++)
+		if (m_rbB.getConstraintRef(i) == this)
+			tcd->m_disableCollisionsBetweenLinkedBodies = true;
+
+	return "btTypedConstraintData";
+}
+
+btRigidBody& btTypedConstraint::getFixedBody()
+{
+	static btRigidBody s_fixed(0, 0,0);
+	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
+	return s_fixed;
+}
+
+
+void btAngularLimit::set(btScalar low, btScalar high, btScalar _softness, btScalar _biasFactor, btScalar _relaxationFactor)
+{
+	m_halfRange = (high - low) / 2.0f;
+	m_center = btNormalizeAngle(low + m_halfRange);
+	m_softness =  _softness;
+	m_biasFactor = _biasFactor;
+	m_relaxationFactor = _relaxationFactor;
+}
+
+void btAngularLimit::test(const btScalar angle)
+{
+	m_correction = 0.0f;
+	m_sign = 0.0f;
+	m_solveLimit = false;
+
+	if (m_halfRange >= 0.0f)
+	{
+		btScalar deviation = btNormalizeAngle(angle - m_center);
+		if (deviation < -m_halfRange)
+		{
+			m_solveLimit = true;
+			m_correction = - (deviation + m_halfRange);
+			m_sign = +1.0f;
+		}
+		else if (deviation > m_halfRange)
+		{
+			m_solveLimit = true;
+			m_correction = m_halfRange - deviation;
+			m_sign = -1.0f;
+		}
+	}
+}
+
+
+btScalar btAngularLimit::getError() const
+{
+	return m_correction * m_sign;
+}
+
+void btAngularLimit::fit(btScalar& angle) const
+{
+	if (m_halfRange > 0.0f)
+	{
+		btScalar relativeAngle = btNormalizeAngle(angle - m_center);
+		if (!btEqual(relativeAngle, m_halfRange))
+		{
+			if (relativeAngle > 0.0f)
+			{
+				angle = getHigh();
+			}
+			else
+			{
+				angle = getLow();
+			}
+		}
+	}
+}
+
+btScalar btAngularLimit::getLow() const
+{
+	return btNormalizeAngle(m_center - m_halfRange);
+}
+
+btScalar btAngularLimit::getHigh() const
+{
+	return btNormalizeAngle(m_center + m_halfRange);
+}
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.h
index 430e16285..441fa3750 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btTypedConstraint.h
@@ -1,6 +1,6 @@
 /*
 Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+Copyright (c) 2003-2010 Erwin Coumans  http://continuousphysics.com/Bullet/
 
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
@@ -13,31 +13,71 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef TYPED_CONSTRAINT_H
-#define TYPED_CONSTRAINT_H
+#ifndef BT_TYPED_CONSTRAINT_H
+#define BT_TYPED_CONSTRAINT_H
+
 
-class btRigidBody;
 #include "LinearMath/btScalar.h"
 #include "btSolverConstraint.h"
-#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
-struct  btSolverBody;
+#include "BulletDynamics/Dynamics/btRigidBody.h"
 
+class btSerializer;
+
+//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
 enum btTypedConstraintType
 {
-	POINT2POINT_CONSTRAINT_TYPE=MAX_CONTACT_MANIFOLD_TYPE+1,
+	POINT2POINT_CONSTRAINT_TYPE=3,
 	HINGE_CONSTRAINT_TYPE,
 	CONETWIST_CONSTRAINT_TYPE,
 	D6_CONSTRAINT_TYPE,
 	SLIDER_CONSTRAINT_TYPE,
-	CONTACT_CONSTRAINT_TYPE
+	CONTACT_CONSTRAINT_TYPE,
+	D6_SPRING_CONSTRAINT_TYPE,
+	GEAR_CONSTRAINT_TYPE,
+	MAX_CONSTRAINT_TYPE
 };
 
+
+enum btConstraintParams
+{
+	BT_CONSTRAINT_ERP=1,
+	BT_CONSTRAINT_STOP_ERP,
+	BT_CONSTRAINT_CFM,
+	BT_CONSTRAINT_STOP_CFM
+};
+
+#if 1
+	#define btAssertConstrParams(_par) btAssert(_par) 
+#else
+	#define btAssertConstrParams(_par)
+#endif
+
+
+ATTRIBUTE_ALIGNED16(struct)	btJointFeedback
+{
+	btVector3	m_appliedForceBodyA;
+	btVector3	m_appliedTorqueBodyA;
+	btVector3	m_appliedForceBodyB;
+	btVector3	m_appliedTorqueBodyB;
+};
+
+
 ///TypedConstraint is the baseclass for Bullet constraints and vehicles
-class btTypedConstraint : public btTypedObject
+ATTRIBUTE_ALIGNED16(class) btTypedConstraint : public btTypedObject
 {
 	int	m_userConstraintType;
-	int	m_userConstraintId;
-	bool m_needsFeedback;
+
+	union
+	{
+		int	m_userConstraintId;
+		void* m_userConstraintPtr;
+	};
+
+	btScalar	m_breakingImpulseThreshold;
+	bool		m_isEnabled;
+	bool		m_needsFeedback;
+	int			m_overrideNumSolverIterations;
+
 
 	btTypedConstraint&	operator=(btTypedConstraint&	other)
 	{
@@ -51,14 +91,16 @@ protected:
 	btRigidBody&	m_rbB;
 	btScalar	m_appliedImpulse;
 	btScalar	m_dbgDrawSize;
+	btJointFeedback*	m_jointFeedback;
 
-	btVector3	m_appliedLinearImpulse;
-	btVector3	m_appliedAngularImpulseA;
-	btVector3	m_appliedAngularImpulseB;
+	///internal method used by the constraint solver, don't use them directly
+	btScalar getMotorFactor(btScalar pos, btScalar lowLim, btScalar uppLim, btScalar vel, btScalar timeFact);
+	
 
 public:
 
-	btTypedConstraint(btTypedConstraintType type);
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	virtual ~btTypedConstraint() {};
 	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA);
 	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA,btRigidBody& rbB);
@@ -67,6 +109,8 @@ public:
 		int m_numConstraintRows,nub;
 	};
 
+	static btRigidBody& getFixedBody();
+
 	struct btConstraintInfo2 {
 		// integrator parameters: frames per second (1/stepsize), default error
 		// reduction parameter (0..1).
@@ -96,14 +140,33 @@ public:
 		int *findex;
 		// number of solver iterations
 		int m_numIterations;
+
+		//damping of the velocity
+		btScalar	m_damping;
 	};
 
+	int	getOverrideNumSolverIterations() const
+	{
+		return m_overrideNumSolverIterations;
+	}
+
+	///override the number of constraint solver iterations used to solve this constraint
+	///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations
+	void setOverrideNumSolverIterations(int overideNumIterations)
+	{
+		m_overrideNumSolverIterations = overideNumIterations;
+	}
+
 	///internal method used by the constraint solver, don't use them directly
-	virtual void	buildJacobian() = 0;
+	virtual void	buildJacobian() {};
 
 	///internal method used by the constraint solver, don't use them directly
 	virtual	void	setupSolverConstraint(btConstraintArray& ca, int solverBodyA,int solverBodyB, btScalar timeStep)
 	{
+        (void)ca;
+        (void)solverBodyA;
+        (void)solverBodyB;
+        (void)timeStep;
 	}
 	
 	///internal method used by the constraint solver, don't use them directly
@@ -117,12 +180,37 @@ public:
 	{
 		m_appliedImpulse = appliedImpulse;
 	}
+	///internal method used by the constraint solver, don't use them directly
+	btScalar	internalGetAppliedImpulse()
+	{
+		return m_appliedImpulse;
+	}
+
+
+	btScalar	getBreakingImpulseThreshold() const
+	{
+		return 	m_breakingImpulseThreshold;
+	}
+
+	void	setBreakingImpulseThreshold(btScalar threshold)
+	{
+		m_breakingImpulseThreshold = threshold;
+	}
+
+	bool	isEnabled() const
+	{
+		return m_isEnabled;
+	}
+
+	void	setEnabled(bool enabled)
+	{
+		m_isEnabled=enabled;
+	}
+
 
 	///internal method used by the constraint solver, don't use them directly
-	virtual	void	solveConstraintObsolete(btSolverBody& bodyA,btSolverBody& bodyB,btScalar	timeStep) = 0;
+	virtual	void	solveConstraintObsolete(btSolverBody& /*bodyA*/,btSolverBody& /*bodyB*/,btScalar	/*timeStep*/) {};
 
-	///internal method used by the constraint solver, don't use them directly
-	btScalar getMotorFactor(btScalar pos, btScalar lowLim, btScalar uppLim, btScalar vel, btScalar timeFact);
 	
 	const btRigidBody& getRigidBodyA() const
 	{
@@ -162,6 +250,32 @@ public:
 		return m_userConstraintId;
 	}
 
+	void	setUserConstraintPtr(void* ptr)
+	{
+		m_userConstraintPtr = ptr;
+	}
+
+	void*	getUserConstraintPtr()
+	{
+		return m_userConstraintPtr;
+	}
+
+	void	setJointFeedback(btJointFeedback* jointFeedback)
+	{
+		m_jointFeedback = jointFeedback;
+	}
+
+	const btJointFeedback* getJointFeedback() const
+	{
+		return m_jointFeedback;
+	}
+
+	btJointFeedback* getJointFeedback()
+	{
+		return m_jointFeedback;
+	}
+
+
 	int getUid() const
 	{
 		return m_userConstraintId;   
@@ -187,44 +301,6 @@ public:
 		return m_appliedImpulse;
 	}
 
-	const btVector3& getAppliedLinearImpulse() const
-	{
-		btAssert(m_needsFeedback);
-		return m_appliedLinearImpulse;
-	}
-
-	btVector3& getAppliedLinearImpulse()
-	{
-		btAssert(m_needsFeedback);
-		return m_appliedLinearImpulse;
-	}
-
-	const btVector3& getAppliedAngularImpulseA() const
-	{
-		btAssert(m_needsFeedback);
-		return m_appliedAngularImpulseA;
-	}
-
-	btVector3& getAppliedAngularImpulseA()
-	{
-		btAssert(m_needsFeedback);
-		return m_appliedAngularImpulseA;
-	}
-
-	const btVector3& getAppliedAngularImpulseB() const
-	{
-		btAssert(m_needsFeedback);
-		return m_appliedAngularImpulseB;
-	}
-
-	btVector3& getAppliedAngularImpulseB()
-	{
-		btAssert(m_needsFeedback);
-		return m_appliedAngularImpulseB;
-	}
-
-	
-
 	btTypedConstraintType getConstraintType () const
 	{
 		return btTypedConstraintType(m_objectType);
@@ -238,7 +314,19 @@ public:
 	{
 		return m_dbgDrawSize;
 	}
+
+	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
+	///If no axis is provided, it uses the default axis for this constraint.
+	virtual	void	setParam(int num, btScalar value, int axis = -1) = 0;
+
+	///return the local value of parameter
+	virtual	btScalar getParam(int num, int axis = -1) const = 0;
 	
+	virtual	int	calculateSerializeBufferSize() const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const;
+
 };
 
 // returns angle in range [-SIMD_2_PI, SIMD_2_PI], closest to one of the limits 
@@ -251,13 +339,13 @@ SIMD_FORCE_INLINE btScalar btAdjustAngleToLimits(btScalar angleInRadians, btScal
 	}
 	else if(angleInRadians < angleLowerLimitInRadians)
 	{
-		btScalar diffLo = btNormalizeAngle(angleLowerLimitInRadians - angleInRadians); // this is positive
+		btScalar diffLo = btFabs(btNormalizeAngle(angleLowerLimitInRadians - angleInRadians));
 		btScalar diffHi = btFabs(btNormalizeAngle(angleUpperLimitInRadians - angleInRadians));
 		return (diffLo < diffHi) ? angleInRadians : (angleInRadians + SIMD_2_PI);
 	}
 	else if(angleInRadians > angleUpperLimitInRadians)
 	{
-		btScalar diffHi = btNormalizeAngle(angleInRadians - angleUpperLimitInRadians); // this is positive
+		btScalar diffHi = btFabs(btNormalizeAngle(angleInRadians - angleUpperLimitInRadians));
 		btScalar diffLo = btFabs(btNormalizeAngle(angleInRadians - angleLowerLimitInRadians));
 		return (diffLo < diffHi) ? (angleInRadians - SIMD_2_PI) : angleInRadians;
 	}
@@ -267,5 +355,128 @@ SIMD_FORCE_INLINE btScalar btAdjustAngleToLimits(btScalar angleInRadians, btScal
 	}
 }
 
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btTypedConstraintData
+{
+	btRigidBodyData		*m_rbA;
+	btRigidBodyData		*m_rbB;
+	char	*m_name;
 
-#endif //TYPED_CONSTRAINT_H
+	int	m_objectType;
+	int	m_userConstraintType;
+	int	m_userConstraintId;
+	int	m_needsFeedback;
+
+	float	m_appliedImpulse;
+	float	m_dbgDrawSize;
+
+	int	m_disableCollisionsBetweenLinkedBodies;
+	int	m_overrideNumSolverIterations;
+
+	float	m_breakingImpulseThreshold;
+	int		m_isEnabled;
+	
+};
+
+SIMD_FORCE_INLINE	int	btTypedConstraint::calculateSerializeBufferSize() const
+{
+	return sizeof(btTypedConstraintData);
+}
+
+
+
+class btAngularLimit
+{
+private:
+	btScalar 
+		m_center,
+		m_halfRange,
+		m_softness,
+		m_biasFactor,
+		m_relaxationFactor,
+		m_correction,
+		m_sign;
+
+	bool
+		m_solveLimit;
+
+public:
+	/// Default constructor initializes limit as inactive, allowing free constraint movement
+	btAngularLimit()
+		:m_center(0.0f),
+		m_halfRange(-1.0f),
+		m_softness(0.9f),
+		m_biasFactor(0.3f),
+		m_relaxationFactor(1.0f),
+		m_correction(0.0f),
+		m_sign(0.0f),
+		m_solveLimit(false)
+	{}
+
+	/// Sets all limit's parameters.
+	/// When low > high limit becomes inactive.
+	/// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit
+	void set(btScalar low, btScalar high, btScalar _softness = 0.9f, btScalar _biasFactor = 0.3f, btScalar _relaxationFactor = 1.0f);
+
+	/// Checks conastaint angle against limit. If limit is active and the angle violates the limit
+	/// correction is calculated.
+	void test(const btScalar angle);
+
+	/// Returns limit's softness
+	inline btScalar getSoftness() const
+	{
+		return m_softness;
+	}
+
+	/// Returns limit's bias factor
+	inline btScalar getBiasFactor() const
+	{
+		return m_biasFactor;
+	}
+
+	/// Returns limit's relaxation factor
+	inline btScalar getRelaxationFactor() const
+	{
+		return m_relaxationFactor;
+	}
+
+	/// Returns correction value evaluated when test() was invoked 
+	inline btScalar getCorrection() const
+	{
+		return m_correction;
+	}
+
+	/// Returns sign value evaluated when test() was invoked 
+	inline btScalar getSign() const
+	{
+		return m_sign;
+	}
+
+	/// Gives half of the distance between min and max limit angle
+	inline btScalar getHalfRange() const
+	{
+		return m_halfRange;
+	}
+
+	/// Returns true when the last test() invocation recognized limit violation
+	inline bool isLimit() const
+	{
+		return m_solveLimit;
+	}
+
+	/// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle
+	/// returned is modified so it equals to the limit closest to given angle.
+	void fit(btScalar& angle) const;
+
+	/// Returns correction value multiplied by sign value
+	btScalar getError() const;
+
+	btScalar getLow() const;
+
+	btScalar getHigh() const;
+
+};
+
+
+
+#endif //BT_TYPED_CONSTRAINT_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp
index 3a4c2afa6..b009f41ae 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.cpp
@@ -27,7 +27,7 @@ subject to the following restrictions:
 // constructor
 // anchor, axis1 and axis2 are in world coordinate system
 // axis1 must be orthogonal to axis2
-btUniversalConstraint::btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB, btVector3& anchor, btVector3& axis1, btVector3& axis2)
+btUniversalConstraint::btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB, const btVector3& anchor, const btVector3& axis1, const btVector3& axis2)
 : btGeneric6DofConstraint(rbA, rbB, btTransform::getIdentity(), btTransform::getIdentity(), true),
  m_anchor(anchor),
  m_axis1(axis1),
@@ -42,8 +42,8 @@ btUniversalConstraint::btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB,
 	// new position of X, allowed limits are (-PI,PI);
 	// So to simulate ODE Universal joint we should use parent axis as Z, child axis as Y and limit all other DOFs
 	// Build the frame in world coordinate system first
-	btVector3 zAxis = axis1.normalize();
-	btVector3 yAxis = axis2.normalize();
+	btVector3 zAxis = m_axis1.normalize();
+	btVector3 yAxis = m_axis2.normalize();
 	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
 	btTransform frameInW;
 	frameInW.setIdentity();
@@ -61,3 +61,27 @@ btUniversalConstraint::btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB,
 	setAngularUpperLimit(btVector3(0.f,  SIMD_HALF_PI - UNIV_EPS,  SIMD_PI - UNIV_EPS));
 }
 
+void btUniversalConstraint::setAxis(const btVector3& axis1,const btVector3& axis2)
+{
+  m_axis1 = axis1;
+  m_axis2 = axis2;
+
+	btVector3 zAxis = axis1.normalized();
+	btVector3 yAxis = axis2.normalized();
+	btVector3 xAxis = yAxis.cross(zAxis); // we want right coordinate system
+
+	btTransform frameInW;
+	frameInW.setIdentity();
+	frameInW.getBasis().setValue(	xAxis[0], yAxis[0], zAxis[0],	
+                                xAxis[1], yAxis[1], zAxis[1],
+                                xAxis[2], yAxis[2], zAxis[2]);
+	frameInW.setOrigin(m_anchor);
+
+	// now get constraint frame in local coordinate systems
+	m_frameInA = m_rbA.getCenterOfMassTransform().inverse() * frameInW;
+	m_frameInB = m_rbB.getCenterOfMassTransform().inverse() * frameInW;
+
+  calculateTransforms();
+}
+
+
diff --git a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.h b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.h
index 4e64a7d7e..9e7084104 100644
--- a/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.h
+++ b/Engine/lib/bullet/src/BulletDynamics/ConstraintSolver/btUniversalConstraint.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef UNIVERSAL_CONSTRAINT_H
-#define UNIVERSAL_CONSTRAINT_H
+#ifndef BT_UNIVERSAL_CONSTRAINT_H
+#define BT_UNIVERSAL_CONSTRAINT_H
 
 
 
@@ -31,17 +31,20 @@ subject to the following restrictions:
 /// "Given axis 1 on body 1, and axis 2 on body 2 that is perpendicular to axis 1, it keeps them perpendicular. 
 /// In other words, rotation of the two bodies about the direction perpendicular to the two axes will be equal."
 
-class btUniversalConstraint : public btGeneric6DofConstraint
+ATTRIBUTE_ALIGNED16(class) btUniversalConstraint : public btGeneric6DofConstraint
 {
 protected:
 	btVector3	m_anchor;
 	btVector3	m_axis1;
 	btVector3	m_axis2;
 public:
+	
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+	
 	// constructor
 	// anchor, axis1 and axis2 are in world coordinate system
 	// axis1 must be orthogonal to axis2
-    btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB, btVector3& anchor, btVector3& axis1, btVector3& axis2);
+    btUniversalConstraint(btRigidBody& rbA, btRigidBody& rbB, const btVector3& anchor, const btVector3& axis1, const btVector3& axis2);
 	// access
 	const btVector3& getAnchor() { return m_calculatedTransformA.getOrigin(); }
 	const btVector3& getAnchor2() { return m_calculatedTransformB.getOrigin(); }
@@ -52,9 +55,11 @@ public:
 	// limits
 	void setUpperLimit(btScalar ang1max, btScalar ang2max) { setAngularUpperLimit(btVector3(0.f, ang1max, ang2max)); }
 	void setLowerLimit(btScalar ang1min, btScalar ang2min) { setAngularLowerLimit(btVector3(0.f, ang1min, ang2min)); }
+
+	void setAxis( const btVector3& axis1, const btVector3& axis2);
 };
 
 
 
-#endif // UNIVERSAL_CONSTRAINT_H
+#endif // BT_UNIVERSAL_CONSTRAINT_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/Bullet-C-API.cpp b/Engine/lib/bullet/src/BulletDynamics/Dynamics/Bullet-C-API.cpp
index 32b63f195..bd8e27483 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/Bullet-C-API.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/Bullet-C-API.cpp
@@ -43,7 +43,7 @@ subject to the following restrictions:
 #include "BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h"
 #include "BulletCollision/NarrowPhaseCollision/btSimplexSolverInterface.h"
 #include "BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.h"
-#include "LinearMath/btStackAlloc.h"
+
 
 /*
 	Create and Delete a Physics SDK	
@@ -373,10 +373,7 @@ double plNearestPoints(float p1[3], float p2[3], float p3[3], float q1[3], float
 	btPointCollector gjkOutput;
 	btGjkPairDetector::ClosestPointInput input;
 	
-	btStackAlloc gStackAlloc(1024*1024*2);
- 
-	input.m_stackAlloc = &gStackAlloc;
-	
+		
 	btTransform tr;
 	tr.setIdentity();
 	
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btActionInterface.h b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btActionInterface.h
index 0832c9b24..e1fea3a49 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btActionInterface.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btActionInterface.h
@@ -20,11 +20,17 @@ class btIDebugDraw;
 class btCollisionWorld;
 
 #include "LinearMath/btScalar.h"
+#include "btRigidBody.h"
 
 ///Basic interface to allow actions such as vehicles and characters to be updated inside a btDynamicsWorld
 class btActionInterface
 {
-	public:
+protected:
+
+	static btRigidBody& getFixedBody();
+	
+	
+public:
 
 	virtual ~btActionInterface()
 	{
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btContinuousDynamicsWorld.cpp b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btContinuousDynamicsWorld.cpp
deleted file mode 100644
index 23501c443..000000000
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btContinuousDynamicsWorld.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-
-#include "btContinuousDynamicsWorld.h"
-#include "LinearMath/btQuickprof.h"
-
-//collision detection
-#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
-#include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
-#include "BulletCollision/CollisionShapes/btCollisionShape.h"
-#include "BulletCollision/CollisionDispatch/btSimulationIslandManager.h"
-
-//rigidbody & constraints
-#include "BulletDynamics/Dynamics/btRigidBody.h"
-#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
-#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
-#include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
-
-
-
-#include <stdio.h>
-
-btContinuousDynamicsWorld::btContinuousDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration)
-:btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration)
-{
-}
-
-btContinuousDynamicsWorld::~btContinuousDynamicsWorld()
-{
-}
-
-	
-void	btContinuousDynamicsWorld::internalSingleStepSimulation( btScalar timeStep)
-{
-	
-	startProfiling(timeStep);
-	
-	if(0 != m_internalPreTickCallback) {
-		(*m_internalPreTickCallback)(this, timeStep);
-	}
-
-
-	///update aabbs information
-	updateAabbs();
-	//static int frame=0;
-//	printf("frame %d\n",frame++);
-
-	///apply gravity, predict motion
-	predictUnconstraintMotion(timeStep);
-
-	btDispatcherInfo& dispatchInfo = getDispatchInfo();
-
-	dispatchInfo.m_timeStep = timeStep;
-	dispatchInfo.m_stepCount = 0;
-	dispatchInfo.m_debugDraw = getDebugDrawer();
-
-	///perform collision detection
-	performDiscreteCollisionDetection();
-
-	calculateSimulationIslands();
-
-	
-	getSolverInfo().m_timeStep = timeStep;
-	
-
-
-	///solve contact and other joint constraints
-	solveConstraints(getSolverInfo());
-	
-	///CallbackTriggers();
-	calculateTimeOfImpacts(timeStep);
-
-	btScalar toi = dispatchInfo.m_timeOfImpact;
-//	if (toi < 1.f)
-//		printf("toi = %f\n",toi);
-	if (toi < 0.f)
-		printf("toi = %f\n",toi);
-
-
-	///integrate transforms
-	integrateTransforms(timeStep * toi);
-
-	///update vehicle simulation
-	updateActions(timeStep);
-
-	updateActivationState( timeStep );
-	
-	if(0 != m_internalTickCallback) {
-		(*m_internalTickCallback)(this, timeStep);
-	}
-}
-
-void	btContinuousDynamicsWorld::calculateTimeOfImpacts(btScalar timeStep)
-{
-		///these should be 'temporal' aabbs!
-		updateTemporalAabbs(timeStep);
-		
-		///'toi' is the global smallest time of impact. However, we just calculate the time of impact for each object individually.
-		///so we handle the case moving versus static properly, and we cheat for moving versus moving
-		btScalar toi = 1.f;
-		
-	
-		btDispatcherInfo& dispatchInfo = getDispatchInfo();
-		dispatchInfo.m_timeStep = timeStep;
-		dispatchInfo.m_timeOfImpact = 1.f;
-		dispatchInfo.m_stepCount = 0;
-		dispatchInfo.m_dispatchFunc = btDispatcherInfo::DISPATCH_CONTINUOUS;
-
-		///calculate time of impact for overlapping pairs
-
-
-		btDispatcher* dispatcher = getDispatcher();
-		if (dispatcher)
-			dispatcher->dispatchAllCollisionPairs(m_broadphasePairCache->getOverlappingPairCache(),dispatchInfo,m_dispatcher1);
-
-		toi = dispatchInfo.m_timeOfImpact;
-
-		dispatchInfo.m_dispatchFunc = btDispatcherInfo::DISPATCH_DISCRETE;
-
-}
-
-void	btContinuousDynamicsWorld::updateTemporalAabbs(btScalar timeStep)
-{
-
-	btVector3 temporalAabbMin,temporalAabbMax;
-
-	for ( int i=0;i<m_collisionObjects.size();i++)
-	{
-		btCollisionObject* colObj = m_collisionObjects[i];
-		
-		btRigidBody* body = btRigidBody::upcast(colObj);
-		if (body)
-		{
-			body->getCollisionShape()->getAabb(m_collisionObjects[i]->getWorldTransform(),temporalAabbMin,temporalAabbMax);
-			const btVector3& linvel = body->getLinearVelocity();
-
-			//make the AABB temporal
-			btScalar temporalAabbMaxx = temporalAabbMax.getX();
-			btScalar temporalAabbMaxy = temporalAabbMax.getY();
-			btScalar temporalAabbMaxz = temporalAabbMax.getZ();
-			btScalar temporalAabbMinx = temporalAabbMin.getX();
-			btScalar temporalAabbMiny = temporalAabbMin.getY();
-			btScalar temporalAabbMinz = temporalAabbMin.getZ();
-
-			// add linear motion
-			btVector3 linMotion = linvel*timeStep;
-		
-			if (linMotion.x() > 0.f)
-				temporalAabbMaxx += linMotion.x(); 
-			else
-				temporalAabbMinx += linMotion.x();
-			if (linMotion.y() > 0.f)
-				temporalAabbMaxy += linMotion.y(); 
-			else
-				temporalAabbMiny += linMotion.y();
-			if (linMotion.z() > 0.f)
-				temporalAabbMaxz += linMotion.z(); 
-			else
-				temporalAabbMinz += linMotion.z();
-
-			//add conservative angular motion
-			btScalar angularMotion(0);// = angvel.length() * GetAngularMotionDisc() * timeStep;
-			btVector3 angularMotion3d(angularMotion,angularMotion,angularMotion);
-			temporalAabbMin = btVector3(temporalAabbMinx,temporalAabbMiny,temporalAabbMinz);
-			temporalAabbMax = btVector3(temporalAabbMaxx,temporalAabbMaxy,temporalAabbMaxz);
-
-			temporalAabbMin -= angularMotion3d;
-			temporalAabbMax += angularMotion3d;
-
-			m_broadphasePairCache->setAabb(body->getBroadphaseHandle(),temporalAabbMin,temporalAabbMax,m_dispatcher1);
-		}
-	}
-
-	//update aabb (of all moved objects)
-
-	m_broadphasePairCache->calculateOverlappingPairs(m_dispatcher1);
-	
-
-
-}
-
-
-
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btContinuousDynamicsWorld.h b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btContinuousDynamicsWorld.h
deleted file mode 100644
index 61c8dea03..000000000
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btContinuousDynamicsWorld.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_CONTINUOUS_DYNAMICS_WORLD_H
-#define BT_CONTINUOUS_DYNAMICS_WORLD_H
-
-#include "btDiscreteDynamicsWorld.h"
-
-///btContinuousDynamicsWorld adds optional (per object) continuous collision detection for fast moving objects to the btDiscreteDynamicsWorld.
-///This copes with fast moving objects that otherwise would tunnel/miss collisions.
-///Under construction, don't use yet! Please use btDiscreteDynamicsWorld instead.
-class btContinuousDynamicsWorld : public btDiscreteDynamicsWorld
-{
-
-	void	updateTemporalAabbs(btScalar timeStep);
-
-	public:
-
-		btContinuousDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration);
-		virtual ~btContinuousDynamicsWorld();
-		
-		///time stepping with calculation of time of impact for selected fast moving objects
-		virtual void	internalSingleStepSimulation( btScalar timeStep);
-
-		virtual void	calculateTimeOfImpacts(btScalar timeStep);
-
-		virtual btDynamicsWorldType	getWorldType() const
-		{
-			return BT_CONTINUOUS_DYNAMICS_WORLD;
-		}
-
-};
-
-#endif //BT_CONTINUOUS_DYNAMICS_WORLD_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
index e97300c52..9ff2d9f11 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
@@ -19,6 +19,7 @@ subject to the following restrictions:
 //collision detection
 #include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
 #include "BulletCollision/BroadphaseCollision/btSimpleBroadphase.h"
+#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "BulletCollision/CollisionShapes/btCollisionShape.h"
 #include "BulletCollision/CollisionDispatch/btSimulationIslandManager.h"
 #include "LinearMath/btTransformUtil.h"
@@ -34,38 +35,183 @@ subject to the following restrictions:
 #include "BulletDynamics/ConstraintSolver/btConeTwistConstraint.h"
 #include "BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h"
 #include "BulletDynamics/ConstraintSolver/btSliderConstraint.h"
+#include "BulletDynamics/ConstraintSolver/btContactConstraint.h"
+
 
-//for debug rendering
-#include "BulletCollision/CollisionShapes/btBoxShape.h"
-#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
-#include "BulletCollision/CollisionShapes/btCompoundShape.h"
-#include "BulletCollision/CollisionShapes/btConeShape.h"
-#include "BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h"
-#include "BulletCollision/CollisionShapes/btCylinderShape.h"
-#include "BulletCollision/CollisionShapes/btMultiSphereShape.h"
-#include "BulletCollision/CollisionShapes/btPolyhedralConvexShape.h"
-#include "BulletCollision/CollisionShapes/btSphereShape.h"
-#include "BulletCollision/CollisionShapes/btTriangleCallback.h"
-#include "BulletCollision/CollisionShapes/btTriangleMeshShape.h"
-#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
 #include "LinearMath/btIDebugDraw.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
 
 
 #include "BulletDynamics/Dynamics/btActionInterface.h"
 #include "LinearMath/btQuickprof.h"
 #include "LinearMath/btMotionState.h"
 
+#include "LinearMath/btSerializer.h"
 
+#if 0
+btAlignedObjectArray<btVector3> debugContacts;
+btAlignedObjectArray<btVector3> debugNormals;
+int startHit=2;
+int firstHit=startHit;
+#endif
+
+SIMD_FORCE_INLINE	int	btGetConstraintIslandId(const btTypedConstraint* lhs)
+{
+	int islandId;
+	
+	const btCollisionObject& rcolObj0 = lhs->getRigidBodyA();
+	const btCollisionObject& rcolObj1 = lhs->getRigidBodyB();
+	islandId= rcolObj0.getIslandTag()>=0?rcolObj0.getIslandTag():rcolObj1.getIslandTag();
+	return islandId;
+
+}
+
+
+class btSortConstraintOnIslandPredicate
+{
+	public:
+
+		bool operator() ( const btTypedConstraint* lhs, const btTypedConstraint* rhs ) const
+		{
+			int rIslandId0,lIslandId0;
+			rIslandId0 = btGetConstraintIslandId(rhs);
+			lIslandId0 = btGetConstraintIslandId(lhs);
+			return lIslandId0 < rIslandId0;
+		}
+};
+
+struct InplaceSolverIslandCallback : public btSimulationIslandManager::IslandCallback
+{
+	btContactSolverInfo*	m_solverInfo;
+	btConstraintSolver*		m_solver;
+	btTypedConstraint**		m_sortedConstraints;
+	int						m_numConstraints;
+	btIDebugDraw*			m_debugDrawer;
+	btStackAlloc*			m_stackAlloc;
+	btDispatcher*			m_dispatcher;
+	
+	btAlignedObjectArray<btCollisionObject*> m_bodies;
+	btAlignedObjectArray<btPersistentManifold*> m_manifolds;
+	btAlignedObjectArray<btTypedConstraint*> m_constraints;
+
+
+	InplaceSolverIslandCallback(
+		btConstraintSolver*	solver,
+		btStackAlloc* stackAlloc,
+		btDispatcher* dispatcher)
+		:m_solverInfo(NULL),
+		m_solver(solver),
+		m_sortedConstraints(NULL),
+		m_numConstraints(0),
+		m_debugDrawer(NULL),
+		m_stackAlloc(stackAlloc),
+		m_dispatcher(dispatcher)
+	{
+
+	}
+
+	InplaceSolverIslandCallback& operator=(InplaceSolverIslandCallback& other)
+	{
+		btAssert(0);
+		(void)other;
+		return *this;
+	}
+
+	SIMD_FORCE_INLINE void setup ( btContactSolverInfo* solverInfo, btTypedConstraint** sortedConstraints,	int	numConstraints,	btIDebugDraw* debugDrawer)
+	{
+		btAssert(solverInfo);
+		m_solverInfo = solverInfo;
+		m_sortedConstraints = sortedConstraints;
+		m_numConstraints = numConstraints;
+		m_debugDrawer = debugDrawer;
+		m_bodies.resize (0);
+		m_manifolds.resize (0);
+		m_constraints.resize (0);
+	}
+
+	
+	virtual	void	processIsland(btCollisionObject** bodies,int numBodies,btPersistentManifold**	manifolds,int numManifolds, int islandId)
+	{
+		if (islandId<0)
+		{
+			///we don't split islands, so all constraints/contact manifolds/bodies are passed into the solver regardless the island id
+			m_solver->solveGroup( bodies,numBodies,manifolds, numManifolds,&m_sortedConstraints[0],m_numConstraints,*m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
+		} else
+		{
+				//also add all non-contact constraints/joints for this island
+			btTypedConstraint** startConstraint = 0;
+			int numCurConstraints = 0;
+			int i;
+			
+			//find the first constraint for this island
+			for (i=0;i<m_numConstraints;i++)
+			{
+				if (btGetConstraintIslandId(m_sortedConstraints[i]) == islandId)
+				{
+					startConstraint = &m_sortedConstraints[i];
+					break;
+				}
+			}
+			//count the number of constraints in this island
+			for (;i<m_numConstraints;i++)
+			{
+				if (btGetConstraintIslandId(m_sortedConstraints[i]) == islandId)
+				{
+					numCurConstraints++;
+				}
+			}
+
+			if (m_solverInfo->m_minimumSolverBatchSize<=1)
+			{
+				m_solver->solveGroup( bodies,numBodies,manifolds, numManifolds,startConstraint,numCurConstraints,*m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
+			} else
+			{
+				
+				for (i=0;i<numBodies;i++)
+					m_bodies.push_back(bodies[i]);
+				for (i=0;i<numManifolds;i++)
+					m_manifolds.push_back(manifolds[i]);
+				for (i=0;i<numCurConstraints;i++)
+					m_constraints.push_back(startConstraint[i]);
+				if ((m_constraints.size()+m_manifolds.size())>m_solverInfo->m_minimumSolverBatchSize)
+				{
+					processConstraints();
+				} else
+				{
+					//printf("deferred\n");
+				}
+			}
+		}
+	}
+	void	processConstraints()
+	{
+
+		btCollisionObject** bodies = m_bodies.size()? &m_bodies[0]:0;
+		btPersistentManifold** manifold = m_manifolds.size()?&m_manifolds[0]:0;
+		btTypedConstraint** constraints = m_constraints.size()?&m_constraints[0]:0;
+			
+		m_solver->solveGroup( bodies,m_bodies.size(),manifold, m_manifolds.size(),constraints, m_constraints.size() ,*m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
+		m_bodies.resize(0);
+		m_manifolds.resize(0);
+		m_constraints.resize(0);
+
+	}
+
+};
 
 
 
 btDiscreteDynamicsWorld::btDiscreteDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration)
 :btDynamicsWorld(dispatcher,pairCache,collisionConfiguration),
+m_sortedConstraints	(),
+m_solverIslandCallback ( NULL ),
 m_constraintSolver(constraintSolver),
 m_gravity(0,-10,0),
-m_localTime(btScalar(1.)/btScalar(60.)),
+m_localTime(0),
 m_synchronizeAllMotionStates(false),
+m_applySpeculativeContactRestitution(false),
 m_profileTimings(0)
+
 {
 	if (!m_constraintSolver)
 	{
@@ -83,6 +229,11 @@ m_profileTimings(0)
 	}
 
 	m_ownsIslandManager = true;
+
+	{
+		void* mem = btAlignedAlloc(sizeof(InplaceSolverIslandCallback),16);
+		m_solverIslandCallback = new (mem) InplaceSolverIslandCallback (m_constraintSolver, m_stackAlloc, dispatcher);
+	}
 }
 
 
@@ -94,6 +245,11 @@ btDiscreteDynamicsWorld::~btDiscreteDynamicsWorld()
 		m_islandManager->~btSimulationIslandManager();
 		btAlignedFree( m_islandManager);
 	}
+	if (m_solverIslandCallback)
+	{
+		m_solverIslandCallback->~InplaceSolverIslandCallback();
+		btAlignedFree(m_solverIslandCallback);
+	}
 	if (m_ownsConstraintSolver)
 	{
 
@@ -127,24 +283,8 @@ void	btDiscreteDynamicsWorld::debugDrawWorld()
 {
 	BT_PROFILE("debugDrawWorld");
 
-	if (getDebugDrawer() && getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawContactPoints)
-	{
-		int numManifolds = getDispatcher()->getNumManifolds();
-		btVector3 color(0,0,0);
-		for (int i=0;i<numManifolds;i++)
-		{
-			btPersistentManifold* contactManifold = getDispatcher()->getManifoldByIndexInternal(i);
-			//btCollisionObject* obA = static_cast<btCollisionObject*>(contactManifold->getBody0());
-			//btCollisionObject* obB = static_cast<btCollisionObject*>(contactManifold->getBody1());
+	btCollisionWorld::debugDrawWorld();
 
-			int numContacts = contactManifold->getNumContacts();
-			for (int j=0;j<numContacts;j++)
-			{
-				btManifoldPoint& cp = contactManifold->getContactPoint(j);
-				getDebugDrawer()->drawContactPoint(cp.m_positionWorldOnB,cp.m_normalWorldOnB,cp.getDistance(),cp.getLifeTime(),color);
-			}
-		}
-	}
 	bool drawConstraints = false;
 	if (getDebugDrawer())
 	{
@@ -165,46 +305,10 @@ void	btDiscreteDynamicsWorld::debugDrawWorld()
 
 
 
-	if (getDebugDrawer() && getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe | btIDebugDraw::DBG_DrawAabb))
+    if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe | btIDebugDraw::DBG_DrawAabb | btIDebugDraw::DBG_DrawNormals)))
 	{
 		int i;
 
-		for (  i=0;i<m_collisionObjects.size();i++)
-		{
-			btCollisionObject* colObj = m_collisionObjects[i];
-			if (getDebugDrawer() && getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawWireframe)
-			{
-				btVector3 color(btScalar(255.),btScalar(255.),btScalar(255.));
-				switch(colObj->getActivationState())
-				{
-				case  ACTIVE_TAG:
-					color = btVector3(btScalar(255.),btScalar(255.),btScalar(255.)); break;
-				case ISLAND_SLEEPING:
-					color =  btVector3(btScalar(0.),btScalar(255.),btScalar(0.));break;
-				case WANTS_DEACTIVATION:
-					color = btVector3(btScalar(0.),btScalar(255.),btScalar(255.));break;
-				case DISABLE_DEACTIVATION:
-					color = btVector3(btScalar(255.),btScalar(0.),btScalar(0.));break;
-				case DISABLE_SIMULATION:
-					color = btVector3(btScalar(255.),btScalar(255.),btScalar(0.));break;
-				default:
-					{
-						color = btVector3(btScalar(255.),btScalar(0.),btScalar(0.));
-					}
-				};
-
-				debugDrawObject(colObj->getWorldTransform(),colObj->getCollisionShape(),color);
-			}
-			if (m_debugDrawer && (m_debugDrawer->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
-			{
-				btVector3 minAabb,maxAabb;
-				btVector3 colorvec(1,0,0);
-				colObj->getCollisionShape()->getAabb(colObj->getWorldTransform(), minAabb,maxAabb);
-				m_debugDrawer->drawAabb(minAabb,maxAabb,colorvec);
-			}
-
-		}
-	
 		if (getDebugDrawer() && getDebugDrawer()->getDebugMode())
 		{
 			for (i=0;i<m_actions.size();i++)
@@ -330,12 +434,14 @@ int	btDiscreteDynamicsWorld::stepSimulation( btScalar timeStep,int maxSubSteps,
 	if (numSimulationSubSteps)
 	{
 
-		saveKinematicState(fixedTimeStep);
+		//clamp the number of substeps, to prevent simulation grinding spiralling down to a halt
+		int clampedSimulationSteps = (numSimulationSubSteps > maxSubSteps)? maxSubSteps : numSimulationSubSteps;
+
+		saveKinematicState(fixedTimeStep*clampedSimulationSteps);
 
 		applyGravity();
 
-		//clamp the number of substeps, to prevent simulation grinding spiralling down to a halt
-		int clampedSimulationSteps = (numSimulationSubSteps > maxSubSteps)? maxSubSteps : numSimulationSubSteps;
+		
 
 		for (int i=0;i<clampedSimulationSteps;i++)
 		{
@@ -375,6 +481,9 @@ void	btDiscreteDynamicsWorld::internalSingleStepSimulation(btScalar timeStep)
 	dispatchInfo.m_stepCount = 0;
 	dispatchInfo.m_debugDraw = getDebugDrawer();
 
+
+    createPredictiveContacts(timeStep);
+    
 	///perform collision detection
 	performDiscreteCollisionDetection();
 
@@ -391,6 +500,7 @@ void	btDiscreteDynamicsWorld::internalSingleStepSimulation(btScalar timeStep)
 	///CallbackTriggers();
 
 	///integrate transforms
+
 	integrateTransforms(timeStep);
 
 	///update vehicle simulation
@@ -409,7 +519,7 @@ void	btDiscreteDynamicsWorld::setGravity(const btVector3& gravity)
 	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
 	{
 		btRigidBody* body = m_nonStaticRigidBodies[i];
-		if (body->isActive())
+		if (body->isActive() && !(body->getFlags() &BT_DISABLE_WORLD_GRAVITY))
 		{
 			body->setGravity(gravity);
 		}
@@ -444,7 +554,7 @@ void	btDiscreteDynamicsWorld::removeRigidBody(btRigidBody* body)
 
 void	btDiscreteDynamicsWorld::addRigidBody(btRigidBody* body)
 {
-	if (!body->isStaticOrKinematicObject())
+	if (!body->isStaticOrKinematicObject() && !(body->getFlags() &BT_DISABLE_WORLD_GRAVITY))
 	{
 		body->setGravity(m_gravity);
 	}
@@ -469,7 +579,7 @@ void	btDiscreteDynamicsWorld::addRigidBody(btRigidBody* body)
 
 void	btDiscreteDynamicsWorld::addRigidBody(btRigidBody* body, short group, short mask)
 {
-	if (!body->isStaticOrKinematicObject())
+	if (!body->isStaticOrKinematicObject() && !(body->getFlags() &BT_DISABLE_WORLD_GRAVITY))
 	{
 		body->setGravity(m_gravity);
 	}
@@ -585,173 +695,87 @@ void	btDiscreteDynamicsWorld::removeCharacter(btActionInterface* character)
 }
 
 
-SIMD_FORCE_INLINE	int	btGetConstraintIslandId(const btTypedConstraint* lhs)
-{
-	int islandId;
-	
-	const btCollisionObject& rcolObj0 = lhs->getRigidBodyA();
-	const btCollisionObject& rcolObj1 = lhs->getRigidBodyB();
-	islandId= rcolObj0.getIslandTag()>=0?rcolObj0.getIslandTag():rcolObj1.getIslandTag();
-	return islandId;
-
-}
-
-
-class btSortConstraintOnIslandPredicate
-{
-	public:
-
-		bool operator() ( const btTypedConstraint* lhs, const btTypedConstraint* rhs )
-		{
-			int rIslandId0,lIslandId0;
-			rIslandId0 = btGetConstraintIslandId(rhs);
-			lIslandId0 = btGetConstraintIslandId(lhs);
-			return lIslandId0 < rIslandId0;
-		}
-};
-
-
 
 
 void	btDiscreteDynamicsWorld::solveConstraints(btContactSolverInfo& solverInfo)
 {
 	BT_PROFILE("solveConstraints");
 	
-	struct InplaceSolverIslandCallback : public btSimulationIslandManager::IslandCallback
-	{
-
-		btContactSolverInfo&	m_solverInfo;
-		btConstraintSolver*		m_solver;
-		btTypedConstraint**		m_sortedConstraints;
-		int						m_numConstraints;
-		btIDebugDraw*			m_debugDrawer;
-		btStackAlloc*			m_stackAlloc;
-		btDispatcher*			m_dispatcher;
-
-		InplaceSolverIslandCallback(
-			btContactSolverInfo& solverInfo,
-			btConstraintSolver*	solver,
-			btTypedConstraint** sortedConstraints,
-			int	numConstraints,
-			btIDebugDraw*	debugDrawer,
-			btStackAlloc*			stackAlloc,
-			btDispatcher* dispatcher)
-			:m_solverInfo(solverInfo),
-			m_solver(solver),
-			m_sortedConstraints(sortedConstraints),
-			m_numConstraints(numConstraints),
-			m_debugDrawer(debugDrawer),
-			m_stackAlloc(stackAlloc),
-			m_dispatcher(dispatcher)
-		{
-
-		}
-
-		InplaceSolverIslandCallback& operator=(InplaceSolverIslandCallback& other)
-		{
-			btAssert(0);
-			(void)other;
-			return *this;
-		}
-		virtual	void	ProcessIsland(btCollisionObject** bodies,int numBodies,btPersistentManifold**	manifolds,int numManifolds, int islandId)
-		{
-			if (islandId<0)
-			{
-				if (numManifolds + m_numConstraints)
-				{
-					///we don't split islands, so all constraints/contact manifolds/bodies are passed into the solver regardless the island id
-					m_solver->solveGroup( bodies,numBodies,manifolds, numManifolds,&m_sortedConstraints[0],m_numConstraints,m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
-				}
-			} else
-			{
-					//also add all non-contact constraints/joints for this island
-				btTypedConstraint** startConstraint = 0;
-				int numCurConstraints = 0;
-				int i;
-				
-				//find the first constraint for this island
-				for (i=0;i<m_numConstraints;i++)
-				{
-					if (btGetConstraintIslandId(m_sortedConstraints[i]) == islandId)
-					{
-						startConstraint = &m_sortedConstraints[i];
-						break;
-					}
-				}
-				//count the number of constraints in this island
-				for (;i<m_numConstraints;i++)
-				{
-					if (btGetConstraintIslandId(m_sortedConstraints[i]) == islandId)
-					{
-						numCurConstraints++;
-					}
-				}
-
-				///only call solveGroup if there is some work: avoid virtual function call, its overhead can be excessive
-				if (numManifolds + numCurConstraints)
-				{
-					m_solver->solveGroup( bodies,numBodies,manifolds, numManifolds,startConstraint,numCurConstraints,m_solverInfo,m_debugDrawer,m_stackAlloc,m_dispatcher);
-				}
-		
-			}
-		}
-
-	};
-
-	//sorted version of all btTypedConstraint, based on islandId
-	btAlignedObjectArray<btTypedConstraint*>	sortedConstraints;
-	sortedConstraints.resize( m_constraints.size());
+	m_sortedConstraints.resize( m_constraints.size());
 	int i; 
 	for (i=0;i<getNumConstraints();i++)
 	{
-		sortedConstraints[i] = m_constraints[i];
+		m_sortedConstraints[i] = m_constraints[i];
 	}
 
 //	btAssert(0);
 		
 	
 
-	sortedConstraints.quickSort(btSortConstraintOnIslandPredicate());
+	m_sortedConstraints.quickSort(btSortConstraintOnIslandPredicate());
 	
-	btTypedConstraint** constraintsPtr = getNumConstraints() ? &sortedConstraints[0] : 0;
-	
-	InplaceSolverIslandCallback	solverCallback(	solverInfo,	m_constraintSolver, constraintsPtr,sortedConstraints.size(),	m_debugDrawer,m_stackAlloc,m_dispatcher1);
+	btTypedConstraint** constraintsPtr = getNumConstraints() ? &m_sortedConstraints[0] : 0;
 	
+	m_solverIslandCallback->setup(&solverInfo,constraintsPtr,m_sortedConstraints.size(),getDebugDrawer());
 	m_constraintSolver->prepareSolve(getCollisionWorld()->getNumCollisionObjects(), getCollisionWorld()->getDispatcher()->getNumManifolds());
 	
 	/// solve all the constraints for this island
-	m_islandManager->buildAndProcessIslands(getCollisionWorld()->getDispatcher(),getCollisionWorld(),&solverCallback);
+	m_islandManager->buildAndProcessIslands(getCollisionWorld()->getDispatcher(),getCollisionWorld(),m_solverIslandCallback);
+
+	m_solverIslandCallback->processConstraints();
 
 	m_constraintSolver->allSolved(solverInfo, m_debugDrawer, m_stackAlloc);
 }
 
 
-
-
 void	btDiscreteDynamicsWorld::calculateSimulationIslands()
 {
 	BT_PROFILE("calculateSimulationIslands");
 
 	getSimulationIslandManager()->updateActivationState(getCollisionWorld(),getCollisionWorld()->getDispatcher());
 
+    {
+        //merge islands based on speculative contact manifolds too
+        for (int i=0;i<this->m_predictiveManifolds.size();i++)
+        {
+            btPersistentManifold* manifold = m_predictiveManifolds[i];
+            
+            const btCollisionObject* colObj0 = manifold->getBody0();
+            const btCollisionObject* colObj1 = manifold->getBody1();
+            
+            if (((colObj0) && (!(colObj0)->isStaticOrKinematicObject())) &&
+                ((colObj1) && (!(colObj1)->isStaticOrKinematicObject())))
+            {
+                if (colObj0->isActive() || colObj1->isActive())
+                {
+                    
+                    getSimulationIslandManager()->getUnionFind().unite((colObj0)->getIslandTag(),
+                                                                       (colObj1)->getIslandTag());
+                }
+            }
+        }
+    }
+    
 	{
 		int i;
 		int numConstraints = int(m_constraints.size());
 		for (i=0;i< numConstraints ; i++ )
 		{
 			btTypedConstraint* constraint = m_constraints[i];
-
-			const btRigidBody* colObj0 = &constraint->getRigidBodyA();
-			const btRigidBody* colObj1 = &constraint->getRigidBodyB();
-
-			if (((colObj0) && (!(colObj0)->isStaticOrKinematicObject())) &&
-				((colObj1) && (!(colObj1)->isStaticOrKinematicObject())))
+			if (constraint->isEnabled())
 			{
-				if (colObj0->isActive() || colObj1->isActive())
-				{
+				const btRigidBody* colObj0 = &constraint->getRigidBodyA();
+				const btRigidBody* colObj1 = &constraint->getRigidBodyB();
 
-					getSimulationIslandManager()->getUnionFind().unite((colObj0)->getIslandTag(),
-						(colObj1)->getIslandTag());
+				if (((colObj0) && (!(colObj0)->isStaticOrKinematicObject())) &&
+					((colObj1) && (!(colObj1)->isStaticOrKinematicObject())))
+				{
+					if (colObj0->isActive() || colObj1->isActive())
+					{
+
+						getSimulationIslandManager()->getUnionFind().unite((colObj0)->getIslandTag(),
+							(colObj1)->getIslandTag());
+					}
 				}
 			}
 		}
@@ -764,21 +788,22 @@ void	btDiscreteDynamicsWorld::calculateSimulationIslands()
 }
 
 
-#include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
+
 
 class btClosestNotMeConvexResultCallback : public btCollisionWorld::ClosestConvexResultCallback
 {
+public:
+
 	btCollisionObject* m_me;
 	btScalar m_allowedPenetration;
 	btOverlappingPairCache* m_pairCache;
 	btDispatcher* m_dispatcher;
 
-
 public:
 	btClosestNotMeConvexResultCallback (btCollisionObject* me,const btVector3& fromA,const btVector3& toA,btOverlappingPairCache* pairCache,btDispatcher* dispatcher) : 
 	  btCollisionWorld::ClosestConvexResultCallback(fromA,toA),
-		m_allowedPenetration(0.0f),
 		m_me(me),
+		m_allowedPenetration(0.0f),
 		m_pairCache(pairCache),
 		m_dispatcher(dispatcher)
 	{
@@ -820,6 +845,7 @@ public:
 		//call needsResponse, see http://code.google.com/p/bullet/issues/detail?id=179
 		if (m_dispatcher->needsResponse(m_me,otherObj))
 		{
+#if 0
 			///don't do CCD when there are already contact points (touching contact/penetration)
 			btAlignedObjectArray<btPersistentManifold*> manifoldArray;
 			btBroadphasePair* collisionPair = m_pairCache->findPair(m_me->getBroadphaseHandle(),proxy0);
@@ -837,8 +863,11 @@ public:
 					}
 				}
 			}
+#endif
+			return true;
 		}
-		return true;
+
+		return false;
 	}
 
 
@@ -847,7 +876,103 @@ public:
 ///internal debugging variable. this value shouldn't be too high
 int gNumClampedCcdMotions=0;
 
-//#include "stdio.h"
+
+void	btDiscreteDynamicsWorld::createPredictiveContacts(btScalar timeStep)
+{
+	BT_PROFILE("createPredictiveContacts");
+	
+	{
+		BT_PROFILE("release predictive contact manifolds");
+
+		for (int i=0;i<m_predictiveManifolds.size();i++)
+		{
+			btPersistentManifold* manifold = m_predictiveManifolds[i];
+			this->m_dispatcher1->releaseManifold(manifold);
+		}
+		m_predictiveManifolds.clear();
+	}
+
+	btTransform predictedTrans;
+	for ( int i=0;i<m_nonStaticRigidBodies.size();i++)
+	{
+		btRigidBody* body = m_nonStaticRigidBodies[i];
+		body->setHitFraction(1.f);
+
+		if (body->isActive() && (!body->isStaticOrKinematicObject()))
+		{
+
+			body->predictIntegratedTransform(timeStep, predictedTrans);
+			
+			btScalar squareMotion = (predictedTrans.getOrigin()-body->getWorldTransform().getOrigin()).length2();
+
+			if (getDispatchInfo().m_useContinuous && body->getCcdSquareMotionThreshold() && body->getCcdSquareMotionThreshold() < squareMotion)
+			{
+				BT_PROFILE("predictive convexSweepTest");
+				if (body->getCollisionShape()->isConvex())
+				{
+					gNumClampedCcdMotions++;
+#ifdef PREDICTIVE_CONTACT_USE_STATIC_ONLY
+					class StaticOnlyCallback : public btClosestNotMeConvexResultCallback
+					{
+					public:
+
+						StaticOnlyCallback (btCollisionObject* me,const btVector3& fromA,const btVector3& toA,btOverlappingPairCache* pairCache,btDispatcher* dispatcher) : 
+						  btClosestNotMeConvexResultCallback(me,fromA,toA,pairCache,dispatcher)
+						{
+						}
+
+					  	virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+						{
+							btCollisionObject* otherObj = (btCollisionObject*) proxy0->m_clientObject;
+							if (!otherObj->isStaticOrKinematicObject())
+								return false;
+							return btClosestNotMeConvexResultCallback::needsCollision(proxy0);
+						}
+					};
+
+					StaticOnlyCallback sweepResults(body,body->getWorldTransform().getOrigin(),predictedTrans.getOrigin(),getBroadphase()->getOverlappingPairCache(),getDispatcher());
+#else
+					btClosestNotMeConvexResultCallback sweepResults(body,body->getWorldTransform().getOrigin(),predictedTrans.getOrigin(),getBroadphase()->getOverlappingPairCache(),getDispatcher());
+#endif
+					//btConvexShape* convexShape = static_cast<btConvexShape*>(body->getCollisionShape());
+					btSphereShape tmpSphere(body->getCcdSweptSphereRadius());//btConvexShape* convexShape = static_cast<btConvexShape*>(body->getCollisionShape());
+					sweepResults.m_allowedPenetration=getDispatchInfo().m_allowedCcdPenetration;
+
+					sweepResults.m_collisionFilterGroup = body->getBroadphaseProxy()->m_collisionFilterGroup;
+					sweepResults.m_collisionFilterMask  = body->getBroadphaseProxy()->m_collisionFilterMask;
+					btTransform modifiedPredictedTrans = predictedTrans;
+					modifiedPredictedTrans.setBasis(body->getWorldTransform().getBasis());
+
+					convexSweepTest(&tmpSphere,body->getWorldTransform(),modifiedPredictedTrans,sweepResults);
+					if (sweepResults.hasHit() && (sweepResults.m_closestHitFraction < 1.f))
+					{
+					
+						btVector3 distVec = (predictedTrans.getOrigin()-body->getWorldTransform().getOrigin())*sweepResults.m_closestHitFraction;
+						btScalar distance = distVec.dot(-sweepResults.m_hitNormalWorld);
+
+						
+						btPersistentManifold* manifold = m_dispatcher1->getNewManifold(body,sweepResults.m_hitCollisionObject);
+						m_predictiveManifolds.push_back(manifold);
+						
+						btVector3 worldPointB = body->getWorldTransform().getOrigin()+distVec;
+						btVector3 localPointB = sweepResults.m_hitCollisionObject->getWorldTransform().inverse()*worldPointB;
+
+						btManifoldPoint newPoint(btVector3(0,0,0), localPointB,sweepResults.m_hitNormalWorld,distance);
+
+						bool isPredictive = true;
+						int index = manifold->addManifoldPoint(newPoint, isPredictive);
+						btManifoldPoint& pt = manifold->getContactPoint(index);
+						pt.m_combinedRestitution = 0;
+						pt.m_combinedFriction = btManifoldResult::calculateCombinedFriction(body,sweepResults.m_hitCollisionObject);
+						pt.m_positionWorldOnA = body->getWorldTransform().getOrigin();
+						pt.m_positionWorldOnB = worldPointB;
+
+					}
+				}
+			}
+		}
+	}
+}
 void	btDiscreteDynamicsWorld::integrateTransforms(btScalar timeStep)
 {
 	BT_PROFILE("integrateTransforms");
@@ -859,43 +984,143 @@ void	btDiscreteDynamicsWorld::integrateTransforms(btScalar timeStep)
 
 		if (body->isActive() && (!body->isStaticOrKinematicObject()))
 		{
+
 			body->predictIntegratedTransform(timeStep, predictedTrans);
+			
 			btScalar squareMotion = (predictedTrans.getOrigin()-body->getWorldTransform().getOrigin()).length2();
 
-			if (body->getCcdSquareMotionThreshold() && body->getCcdSquareMotionThreshold() < squareMotion)
+			
+
+			if (getDispatchInfo().m_useContinuous && body->getCcdSquareMotionThreshold() && body->getCcdSquareMotionThreshold() < squareMotion)
 			{
 				BT_PROFILE("CCD motion clamping");
 				if (body->getCollisionShape()->isConvex())
 				{
 					gNumClampedCcdMotions++;
-					
+#ifdef USE_STATIC_ONLY
+					class StaticOnlyCallback : public btClosestNotMeConvexResultCallback
+					{
+					public:
+
+						StaticOnlyCallback (btCollisionObject* me,const btVector3& fromA,const btVector3& toA,btOverlappingPairCache* pairCache,btDispatcher* dispatcher) : 
+						  btClosestNotMeConvexResultCallback(me,fromA,toA,pairCache,dispatcher)
+						{
+						}
+
+					  	virtual bool needsCollision(btBroadphaseProxy* proxy0) const
+						{
+							btCollisionObject* otherObj = (btCollisionObject*) proxy0->m_clientObject;
+							if (!otherObj->isStaticOrKinematicObject())
+								return false;
+							return btClosestNotMeConvexResultCallback::needsCollision(proxy0);
+						}
+					};
+
+					StaticOnlyCallback sweepResults(body,body->getWorldTransform().getOrigin(),predictedTrans.getOrigin(),getBroadphase()->getOverlappingPairCache(),getDispatcher());
+#else
 					btClosestNotMeConvexResultCallback sweepResults(body,body->getWorldTransform().getOrigin(),predictedTrans.getOrigin(),getBroadphase()->getOverlappingPairCache(),getDispatcher());
+#endif
 					//btConvexShape* convexShape = static_cast<btConvexShape*>(body->getCollisionShape());
 					btSphereShape tmpSphere(body->getCcdSweptSphereRadius());//btConvexShape* convexShape = static_cast<btConvexShape*>(body->getCollisionShape());
+					sweepResults.m_allowedPenetration=getDispatchInfo().m_allowedCcdPenetration;
 
 					sweepResults.m_collisionFilterGroup = body->getBroadphaseProxy()->m_collisionFilterGroup;
 					sweepResults.m_collisionFilterMask  = body->getBroadphaseProxy()->m_collisionFilterMask;
+					btTransform modifiedPredictedTrans = predictedTrans;
+					modifiedPredictedTrans.setBasis(body->getWorldTransform().getBasis());
 
-					convexSweepTest(&tmpSphere,body->getWorldTransform(),predictedTrans,sweepResults);
+					convexSweepTest(&tmpSphere,body->getWorldTransform(),modifiedPredictedTrans,sweepResults);
 					if (sweepResults.hasHit() && (sweepResults.m_closestHitFraction < 1.f))
 					{
+						
+						//printf("clamped integration to hit fraction = %f\n",fraction);
 						body->setHitFraction(sweepResults.m_closestHitFraction);
 						body->predictIntegratedTransform(timeStep*body->getHitFraction(), predictedTrans);
 						body->setHitFraction(0.f);
-//							printf("clamped integration to hit fraction = %f\n",fraction);
+						body->proceedToTransform( predictedTrans);
+
+#if 0
+						btVector3 linVel = body->getLinearVelocity();
+
+						btScalar maxSpeed = body->getCcdMotionThreshold()/getSolverInfo().m_timeStep;
+						btScalar maxSpeedSqr = maxSpeed*maxSpeed;
+						if (linVel.length2()>maxSpeedSqr)
+						{
+							linVel.normalize();
+							linVel*= maxSpeed;
+							body->setLinearVelocity(linVel);
+							btScalar ms2 = body->getLinearVelocity().length2();
+							body->predictIntegratedTransform(timeStep, predictedTrans);
+
+							btScalar sm2 = (predictedTrans.getOrigin()-body->getWorldTransform().getOrigin()).length2();
+							btScalar smt = body->getCcdSquareMotionThreshold();
+							printf("sm2=%f\n",sm2);
+						}
+#else
+						
+						//don't apply the collision response right now, it will happen next frame
+						//if you really need to, you can uncomment next 3 lines. Note that is uses zero restitution.
+						//btScalar appliedImpulse = 0.f;
+						//btScalar depth = 0.f;
+						//appliedImpulse = resolveSingleCollision(body,(btCollisionObject*)sweepResults.m_hitCollisionObject,sweepResults.m_hitPointWorld,sweepResults.m_hitNormalWorld,getSolverInfo(), depth);
+						
+
+#endif
+
+        				continue;
 					}
 				}
 			}
 			
+
 			body->proceedToTransform( predictedTrans);
+		
+		}
+
+	}
+
+	///this should probably be switched on by default, but it is not well tested yet
+	if (m_applySpeculativeContactRestitution)
+	{
+		BT_PROFILE("apply speculative contact restitution");
+		for (int i=0;i<m_predictiveManifolds.size();i++)
+		{
+			btPersistentManifold* manifold = m_predictiveManifolds[i];
+			btRigidBody* body0 = btRigidBody::upcast((btCollisionObject*)manifold->getBody0());
+			btRigidBody* body1 = btRigidBody::upcast((btCollisionObject*)manifold->getBody1());
+			
+			for (int p=0;p<manifold->getNumContacts();p++)
+			{
+				const btManifoldPoint& pt = manifold->getContactPoint(p);
+				btScalar combinedRestitution = btManifoldResult::calculateCombinedRestitution(body0, body1);
+
+				if (combinedRestitution>0 && pt.m_appliedImpulse != 0.f)
+				//if (pt.getDistance()>0 && combinedRestitution>0 && pt.m_appliedImpulse != 0.f)
+				{
+					btVector3 imp = -pt.m_normalWorldOnB * pt.m_appliedImpulse* combinedRestitution;
+				
+					const btVector3& pos1 = pt.getPositionWorldOnA();
+					const btVector3& pos2 = pt.getPositionWorldOnB();
+
+					btVector3 rel_pos0 = pos1 - body0->getWorldTransform().getOrigin(); 
+					btVector3 rel_pos1 = pos2 - body1->getWorldTransform().getOrigin();
+
+					if (body0)
+						body0->applyImpulse(imp,rel_pos0);
+					if (body1)
+						body1->applyImpulse(-imp,rel_pos1);
+				}
+			}
 		}
 	}
+	
 }
 
 
 
 
 
+
 void	btDiscreteDynamicsWorld::predictUnconstraintMotion(btScalar timeStep)
 {
 	BT_PROFILE("predictUnconstraintMotion");
@@ -904,7 +1129,8 @@ void	btDiscreteDynamicsWorld::predictUnconstraintMotion(btScalar timeStep)
 		btRigidBody* body = m_nonStaticRigidBodies[i];
 		if (!body->isStaticOrKinematicObject())
 		{
-			body->integrateVelocities( timeStep);
+			//don't integrate/update velocities here, it happens in the constraint solver
+
 			//damping
 			body->applyDamping(timeStep);
 
@@ -929,283 +1155,6 @@ void	btDiscreteDynamicsWorld::startProfiling(btScalar timeStep)
 
 	
 
-class DebugDrawcallback : public btTriangleCallback, public btInternalTriangleIndexCallback
-{
-	btIDebugDraw*	m_debugDrawer;
-	btVector3	m_color;
-	btTransform	m_worldTrans;
-
-public:
-
-	DebugDrawcallback(btIDebugDraw*	debugDrawer,const btTransform& worldTrans,const btVector3& color) :
-                m_debugDrawer(debugDrawer),
-		m_color(color),
-		m_worldTrans(worldTrans)
-	{
-	}
-
-	virtual void internalProcessTriangleIndex(btVector3* triangle,int partId,int  triangleIndex)
-	{
-		processTriangle(triangle,partId,triangleIndex);
-	}
-
-	virtual void processTriangle(btVector3* triangle,int partId, int triangleIndex)
-	{
-		(void)partId;
-		(void)triangleIndex;
-
-		btVector3 wv0,wv1,wv2;
-		wv0 = m_worldTrans*triangle[0];
-		wv1 = m_worldTrans*triangle[1];
-		wv2 = m_worldTrans*triangle[2];
-		m_debugDrawer->drawLine(wv0,wv1,m_color);
-		m_debugDrawer->drawLine(wv1,wv2,m_color);
-		m_debugDrawer->drawLine(wv2,wv0,m_color);
-	}
-};
-
-void btDiscreteDynamicsWorld::debugDrawSphere(btScalar radius, const btTransform& transform, const btVector3& color)
-{
-	btVector3 start = transform.getOrigin();
-
-	const btVector3 xoffs = transform.getBasis() * btVector3(radius,0,0);
-	const btVector3 yoffs = transform.getBasis() * btVector3(0,radius,0);
-	const btVector3 zoffs = transform.getBasis() * btVector3(0,0,radius);
-
-	// XY 
-	getDebugDrawer()->drawLine(start-xoffs, start+yoffs, color);
-	getDebugDrawer()->drawLine(start+yoffs, start+xoffs, color);
-	getDebugDrawer()->drawLine(start+xoffs, start-yoffs, color);
-	getDebugDrawer()->drawLine(start-yoffs, start-xoffs, color);
-
-	// XZ
-	getDebugDrawer()->drawLine(start-xoffs, start+zoffs, color);
-	getDebugDrawer()->drawLine(start+zoffs, start+xoffs, color);
-	getDebugDrawer()->drawLine(start+xoffs, start-zoffs, color);
-	getDebugDrawer()->drawLine(start-zoffs, start-xoffs, color);
-
-	// YZ
-	getDebugDrawer()->drawLine(start-yoffs, start+zoffs, color);
-	getDebugDrawer()->drawLine(start+zoffs, start+yoffs, color);
-	getDebugDrawer()->drawLine(start+yoffs, start-zoffs, color);
-	getDebugDrawer()->drawLine(start-zoffs, start-yoffs, color);
-}
-
-void btDiscreteDynamicsWorld::debugDrawObject(const btTransform& worldTransform, const btCollisionShape* shape, const btVector3& color)
-{
-	// Draw a small simplex at the center of the object
-	{
-		btVector3 start = worldTransform.getOrigin();
-		getDebugDrawer()->drawLine(start, start+worldTransform.getBasis() * btVector3(1,0,0), btVector3(1,0,0));
-		getDebugDrawer()->drawLine(start, start+worldTransform.getBasis() * btVector3(0,1,0), btVector3(0,1,0));
-		getDebugDrawer()->drawLine(start, start+worldTransform.getBasis() * btVector3(0,0,1), btVector3(0,0,1));
-	}
-
-	if (shape->getShapeType() == COMPOUND_SHAPE_PROXYTYPE)
-	{
-		const btCompoundShape* compoundShape = static_cast<const btCompoundShape*>(shape);
-		for (int i=compoundShape->getNumChildShapes()-1;i>=0;i--)
-		{
-			btTransform childTrans = compoundShape->getChildTransform(i);
-			const btCollisionShape* colShape = compoundShape->getChildShape(i);
-			debugDrawObject(worldTransform*childTrans,colShape,color);
-		}
-
-	} else
-	{
-		switch (shape->getShapeType())
-		{
-
-		case SPHERE_SHAPE_PROXYTYPE:
-			{
-				const btSphereShape* sphereShape = static_cast<const btSphereShape*>(shape);
-				btScalar radius = sphereShape->getMargin();//radius doesn't include the margin, so draw with margin
-				
-				debugDrawSphere(radius, worldTransform, color);
-				break;
-			}
-		case MULTI_SPHERE_SHAPE_PROXYTYPE:
-			{
-				const btMultiSphereShape* multiSphereShape = static_cast<const btMultiSphereShape*>(shape);
-
-				btTransform childTransform;
-				childTransform.setIdentity();
-
-				for (int i = multiSphereShape->getSphereCount()-1; i>=0;i--)
-				{
-					childTransform.setOrigin(multiSphereShape->getSpherePosition(i));
-					debugDrawSphere(multiSphereShape->getSphereRadius(i), worldTransform*childTransform, color);
-				}
-
-				break;
-			}
-		case CAPSULE_SHAPE_PROXYTYPE:
-			{
-				const btCapsuleShape* capsuleShape = static_cast<const btCapsuleShape*>(shape);
-
-				btScalar radius = capsuleShape->getRadius();
-				btScalar halfHeight = capsuleShape->getHalfHeight();
-				
-				int upAxis = capsuleShape->getUpAxis();
-
-				
-				btVector3 capStart(0.f,0.f,0.f);
-				capStart[upAxis] = -halfHeight;
-
-				btVector3 capEnd(0.f,0.f,0.f);
-				capEnd[upAxis] = halfHeight;
-
-				// Draw the ends
-				{
-					
-					btTransform childTransform = worldTransform;
-					childTransform.getOrigin() = worldTransform * capStart;
-					debugDrawSphere(radius, childTransform, color);
-				}
-
-				{
-					btTransform childTransform = worldTransform;
-					childTransform.getOrigin() = worldTransform * capEnd;
-					debugDrawSphere(radius, childTransform, color);
-				}
-
-				// Draw some additional lines
-				btVector3 start = worldTransform.getOrigin();
-
-				
-				capStart[(upAxis+1)%3] = radius;
-				capEnd[(upAxis+1)%3] = radius;
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * capStart,start+worldTransform.getBasis() * capEnd, color);
-				capStart[(upAxis+1)%3] = -radius;
-				capEnd[(upAxis+1)%3] = -radius;
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * capStart,start+worldTransform.getBasis() * capEnd, color);
-
-				capStart[(upAxis+1)%3] = 0.f;
-				capEnd[(upAxis+1)%3] = 0.f;
-
-				capStart[(upAxis+2)%3] = radius;
-				capEnd[(upAxis+2)%3] = radius;
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * capStart,start+worldTransform.getBasis() * capEnd, color);
-				capStart[(upAxis+2)%3] = -radius;
-				capEnd[(upAxis+2)%3] = -radius;
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * capStart,start+worldTransform.getBasis() * capEnd, color);
-
-				
-				break;
-			}
-		case CONE_SHAPE_PROXYTYPE:
-			{
-				const btConeShape* coneShape = static_cast<const btConeShape*>(shape);
-				btScalar radius = coneShape->getRadius();//+coneShape->getMargin();
-				btScalar height = coneShape->getHeight();//+coneShape->getMargin();
-				btVector3 start = worldTransform.getOrigin();
-
-				int upAxis= coneShape->getConeUpIndex();
-				
-
-				btVector3	offsetHeight(0,0,0);
-				offsetHeight[upAxis] = height * btScalar(0.5);
-				btVector3	offsetRadius(0,0,0);
-				offsetRadius[(upAxis+1)%3] = radius;
-				btVector3	offset2Radius(0,0,0);
-				offset2Radius[(upAxis+2)%3] = radius;
-
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * (offsetHeight),start+worldTransform.getBasis() * (-offsetHeight+offsetRadius),color);
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * (offsetHeight),start+worldTransform.getBasis() * (-offsetHeight-offsetRadius),color);
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * (offsetHeight),start+worldTransform.getBasis() * (-offsetHeight+offset2Radius),color);
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * (offsetHeight),start+worldTransform.getBasis() * (-offsetHeight-offset2Radius),color);
-
-
-
-				break;
-
-			}
-		case CYLINDER_SHAPE_PROXYTYPE:
-			{
-				const btCylinderShape* cylinder = static_cast<const btCylinderShape*>(shape);
-				int upAxis = cylinder->getUpAxis();
-				btScalar radius = cylinder->getRadius();
-				btScalar halfHeight = cylinder->getHalfExtentsWithMargin()[upAxis];
-				btVector3 start = worldTransform.getOrigin();
-				btVector3	offsetHeight(0,0,0);
-				offsetHeight[upAxis] = halfHeight;
-				btVector3	offsetRadius(0,0,0);
-				offsetRadius[(upAxis+1)%3] = radius;
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * (offsetHeight+offsetRadius),start+worldTransform.getBasis() * (-offsetHeight+offsetRadius),color);
-				getDebugDrawer()->drawLine(start+worldTransform.getBasis() * (offsetHeight-offsetRadius),start+worldTransform.getBasis() * (-offsetHeight-offsetRadius),color);
-				break;
-			}
-
-			case STATIC_PLANE_PROXYTYPE:
-				{
-					const btStaticPlaneShape* staticPlaneShape = static_cast<const btStaticPlaneShape*>(shape);
-					btScalar planeConst = staticPlaneShape->getPlaneConstant();
-					const btVector3& planeNormal = staticPlaneShape->getPlaneNormal();
-					btVector3 planeOrigin = planeNormal * planeConst;
-					btVector3 vec0,vec1;
-					btPlaneSpace1(planeNormal,vec0,vec1);
-					btScalar vecLen = 100.f;
-					btVector3 pt0 = planeOrigin + vec0*vecLen;
-					btVector3 pt1 = planeOrigin - vec0*vecLen;
-					btVector3 pt2 = planeOrigin + vec1*vecLen;
-					btVector3 pt3 = planeOrigin - vec1*vecLen;
-					getDebugDrawer()->drawLine(worldTransform*pt0,worldTransform*pt1,color);
-					getDebugDrawer()->drawLine(worldTransform*pt2,worldTransform*pt3,color);
-					break;
-
-				}
-		default:
-			{
-
-				if (shape->isConcave())
-				{
-					btConcaveShape* concaveMesh = (btConcaveShape*) shape;
-					
-					///@todo pass camera, for some culling? no -> we are not a graphics lib
-					btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
-					btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
-
-					DebugDrawcallback drawCallback(getDebugDrawer(),worldTransform,color);
-					concaveMesh->processAllTriangles(&drawCallback,aabbMin,aabbMax);
-
-				}
-
-				if (shape->getShapeType() == CONVEX_TRIANGLEMESH_SHAPE_PROXYTYPE)
-				{
-					btConvexTriangleMeshShape* convexMesh = (btConvexTriangleMeshShape*) shape;
-					//todo: pass camera for some culling			
-					btVector3 aabbMax(btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT),btScalar(BT_LARGE_FLOAT));
-					btVector3 aabbMin(btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT),btScalar(-BT_LARGE_FLOAT));
-					//DebugDrawcallback drawCallback;
-					DebugDrawcallback drawCallback(getDebugDrawer(),worldTransform,color);
-					convexMesh->getMeshInterface()->InternalProcessAllTriangles(&drawCallback,aabbMin,aabbMax);
-				}
-
-
-				/// for polyhedral shapes
-				if (shape->isPolyhedral())
-				{
-					btPolyhedralConvexShape* polyshape = (btPolyhedralConvexShape*) shape;
-
-					int i;
-					for (i=0;i<polyshape->getNumEdges();i++)
-					{
-						btVector3 a,b;
-						polyshape->getEdge(i,a,b);
-						btVector3 wa = worldTransform * a;
-						btVector3 wb = worldTransform * b;
-						getDebugDrawer()->drawLine(wa,wb,color);
-
-					}
-
-					
-				}
-			}
-		}
-	}
-}
-
-
 void btDiscreteDynamicsWorld::debugDrawConstraint(btTypedConstraint* constraint)
 {
 	bool drawFrames = (getDebugDrawer()->getDebugMode() & btIDebugDraw::DBG_DrawConstraints) != 0;
@@ -1309,6 +1258,7 @@ void btDiscreteDynamicsWorld::debugDrawConstraint(btTypedConstraint* constraint)
 				}
 			}
 			break;
+		case D6_SPRING_CONSTRAINT_TYPE:
 		case D6_CONSTRAINT_TYPE:
 			{
 				btGeneric6DofConstraint* p6DOF = (btGeneric6DofConstraint*)constraint;
@@ -1366,7 +1316,7 @@ void btDiscreteDynamicsWorld::debugDrawConstraint(btTypedConstraint* constraint)
 				if(drawFrames) getDebugDrawer()->drawTransform(tr, dbgDrawSize);
 				if(drawLimits)
 				{
-					btTransform tr = pSlider->getCalculatedTransformA();
+					btTransform tr = pSlider->getUseLinearReferenceFrameA() ? pSlider->getCalculatedTransformA() : pSlider->getCalculatedTransformB();
 					btVector3 li_min = tr * btVector3(pSlider->getLowerLinLimit(), 0.f, 0.f);
 					btVector3 li_max = tr * btVector3(pSlider->getUpperLinLimit(), 0.f, 0.f);
 					getDebugDrawer()->drawLine(li_min, li_max, btVector3(0, 0, 0));
@@ -1397,6 +1347,7 @@ void	btDiscreteDynamicsWorld::setConstraintSolver(btConstraintSolver* solver)
 	}
 	m_ownsConstraintSolver = false;
 	m_constraintSolver = solver;
+	m_solverIslandCallback->m_solver = solver;
 }
 
 btConstraintSolver* btDiscreteDynamicsWorld::getConstraintSolver()
@@ -1419,3 +1370,97 @@ const btTypedConstraint* btDiscreteDynamicsWorld::getConstraint(int index) const
 }
 
 
+
+void	btDiscreteDynamicsWorld::serializeRigidBodies(btSerializer* serializer)
+{
+	int i;
+	//serialize all collision objects
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		if (colObj->getInternalType() & btCollisionObject::CO_RIGID_BODY)
+		{
+			int len = colObj->calculateSerializeBufferSize();
+			btChunk* chunk = serializer->allocate(len,1);
+			const char* structType = colObj->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_RIGIDBODY_CODE,colObj);
+		}
+	}
+
+	for (i=0;i<m_constraints.size();i++)
+	{
+		btTypedConstraint* constraint = m_constraints[i];
+		int size = constraint->calculateSerializeBufferSize();
+		btChunk* chunk = serializer->allocate(size,1);
+		const char* structType = constraint->serialize(chunk->m_oldPtr,serializer);
+		serializer->finalizeChunk(chunk,structType,BT_CONSTRAINT_CODE,constraint);
+	}
+}
+
+
+
+
+void	btDiscreteDynamicsWorld::serializeDynamicsWorldInfo(btSerializer* serializer)
+{
+#ifdef BT_USE_DOUBLE_PRECISION
+		int len = sizeof(btDynamicsWorldDoubleData);
+		btChunk* chunk = serializer->allocate(len,1);
+		btDynamicsWorldDoubleData* worldInfo = (btDynamicsWorldDoubleData*)chunk->m_oldPtr;
+#else//BT_USE_DOUBLE_PRECISION
+		int len = sizeof(btDynamicsWorldFloatData);
+		btChunk* chunk = serializer->allocate(len,1);
+		btDynamicsWorldFloatData* worldInfo = (btDynamicsWorldFloatData*)chunk->m_oldPtr;
+#endif//BT_USE_DOUBLE_PRECISION
+
+		memset(worldInfo ,0x00,len);
+
+		m_gravity.serialize(worldInfo->m_gravity);
+		worldInfo->m_solverInfo.m_tau = getSolverInfo().m_tau;
+		worldInfo->m_solverInfo.m_damping = getSolverInfo().m_damping;
+		worldInfo->m_solverInfo.m_friction = getSolverInfo().m_friction;
+		worldInfo->m_solverInfo.m_timeStep = getSolverInfo().m_timeStep;
+
+		worldInfo->m_solverInfo.m_restitution = getSolverInfo().m_restitution;
+		worldInfo->m_solverInfo.m_maxErrorReduction = getSolverInfo().m_maxErrorReduction;
+		worldInfo->m_solverInfo.m_sor = getSolverInfo().m_sor;
+		worldInfo->m_solverInfo.m_erp = getSolverInfo().m_erp;
+
+		worldInfo->m_solverInfo.m_erp2 = getSolverInfo().m_erp2;
+		worldInfo->m_solverInfo.m_globalCfm = getSolverInfo().m_globalCfm;
+		worldInfo->m_solverInfo.m_splitImpulsePenetrationThreshold = getSolverInfo().m_splitImpulsePenetrationThreshold;
+		worldInfo->m_solverInfo.m_splitImpulseTurnErp = getSolverInfo().m_splitImpulseTurnErp;
+		
+		worldInfo->m_solverInfo.m_linearSlop = getSolverInfo().m_linearSlop;
+		worldInfo->m_solverInfo.m_warmstartingFactor = getSolverInfo().m_warmstartingFactor;
+		worldInfo->m_solverInfo.m_maxGyroscopicForce = getSolverInfo().m_maxGyroscopicForce;
+		worldInfo->m_solverInfo.m_singleAxisRollingFrictionThreshold = getSolverInfo().m_singleAxisRollingFrictionThreshold;
+		
+		worldInfo->m_solverInfo.m_numIterations = getSolverInfo().m_numIterations;
+		worldInfo->m_solverInfo.m_solverMode = getSolverInfo().m_solverMode;
+		worldInfo->m_solverInfo.m_restingContactRestitutionThreshold = getSolverInfo().m_restingContactRestitutionThreshold;
+		worldInfo->m_solverInfo.m_minimumSolverBatchSize = getSolverInfo().m_minimumSolverBatchSize;
+		
+		worldInfo->m_solverInfo.m_splitImpulse = getSolverInfo().m_splitImpulse;
+	
+#ifdef BT_USE_DOUBLE_PRECISION
+		const char* structType = "btDynamicsWorldDoubleData";
+#else//BT_USE_DOUBLE_PRECISION
+		const char* structType = "btDynamicsWorldFloatData";
+#endif//BT_USE_DOUBLE_PRECISION
+		serializer->finalizeChunk(chunk,structType,BT_DYNAMICSWORLD_CODE,worldInfo);
+}
+
+void	btDiscreteDynamicsWorld::serialize(btSerializer* serializer)
+{
+
+	serializer->startSerialization();
+
+	serializeDynamicsWorldInfo(serializer);
+
+	serializeRigidBodies(serializer);
+
+	serializeCollisionObjects(serializer);
+
+	serializer->finishSerialization();
+}
+
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h
index 23284a9b7..fa934c49d 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h
@@ -25,16 +25,21 @@ class btConstraintSolver;
 class btSimulationIslandManager;
 class btTypedConstraint;
 class btActionInterface;
-
+class btPersistentManifold;
 class btIDebugDraw;
+struct InplaceSolverIslandCallback;
+
 #include "LinearMath/btAlignedObjectArray.h"
 
 
 ///btDiscreteDynamicsWorld provides discrete rigid body simulation
 ///those classes replace the obsolete CcdPhysicsEnvironment/CcdPhysicsController
-class btDiscreteDynamicsWorld : public btDynamicsWorld
+ATTRIBUTE_ALIGNED16(class) btDiscreteDynamicsWorld : public btDynamicsWorld
 {
 protected:
+	
+    btAlignedObjectArray<btTypedConstraint*>	m_sortedConstraints;
+	InplaceSolverIslandCallback* 	m_solverIslandCallback;
 
 	btConstraintSolver*	m_constraintSolver;
 
@@ -53,11 +58,14 @@ protected:
 	bool	m_ownsIslandManager;
 	bool	m_ownsConstraintSolver;
 	bool	m_synchronizeAllMotionStates;
+	bool	m_applySpeculativeContactRestitution;
 
 	btAlignedObjectArray<btActionInterface*>	m_actions;
 	
 	int	m_profileTimings;
 
+	btAlignedObjectArray<btPersistentManifold*>	m_predictiveManifolds;
+
 	virtual void	predictUnconstraintMotion(btScalar timeStep);
 	
 	virtual void	integrateTransforms(btScalar timeStep);
@@ -74,15 +82,19 @@ protected:
 
 	virtual void	internalSingleStepSimulation( btScalar timeStep);
 
+	void	createPredictiveContacts(btScalar timeStep);
 
 	virtual void	saveKinematicState(btScalar timeStep);
 
-	void	debugDrawSphere(btScalar radius, const btTransform& transform, const btVector3& color);
+	void	serializeRigidBodies(btSerializer* serializer);
 
+	void	serializeDynamicsWorldInfo(btSerializer* serializer);
 
 public:
 
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	///this btDiscreteDynamicsWorld constructor gets created objects from the user, and will not delete those
 	btDiscreteDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration);
 
@@ -135,7 +147,6 @@ public:
 	///removeCollisionObject will first check if it is a rigid body, if so call removeRigidBody otherwise call btCollisionWorld::removeCollisionObject
 	virtual void	removeCollisionObject(btCollisionObject* collisionObject);
 
-	void	debugDrawObject(const btTransform& worldTransform, const btCollisionShape* shape, const btVector3& color);
 
 	void	debugDrawConstraint(btTypedConstraint* constraint);
 
@@ -192,6 +203,19 @@ public:
 		return m_synchronizeAllMotionStates;
 	}
 
+	void setApplySpeculativeContactRestitution(bool enable)
+	{
+		m_applySpeculativeContactRestitution = enable;
+	}
+	
+	bool getApplySpeculativeContactRestitution() const
+	{
+		return m_applySpeculativeContactRestitution;
+	}
+
+	///Preliminary serialization test for Bullet 2.76. Loading those files requires a separate parser (see Bullet/Demos/SerializeDemo)
+	virtual	void	serialize(btSerializer* serializer);
+
 };
 
 #endif //BT_DISCRETE_DYNAMICS_WORLD_H
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDynamicsWorld.h b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDynamicsWorld.h
index a7b85afbe..7d5c621f8 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDynamicsWorld.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btDynamicsWorld.h
@@ -32,7 +32,8 @@ enum btDynamicsWorldType
 {
 	BT_SIMPLE_DYNAMICS_WORLD=1,
 	BT_DISCRETE_DYNAMICS_WORLD=2,
-	BT_CONTINUOUS_DYNAMICS_WORLD=3
+	BT_CONTINUOUS_DYNAMICS_WORLD=3,
+	BT_SOFT_RIGID_DYNAMICS_WORLD=4
 };
 
 ///The btDynamicsWorld is the interface class for several dynamics implementation, basic, discrete, parallel, and continuous etc.
@@ -86,6 +87,8 @@ public:
 
 		virtual void	addRigidBody(btRigidBody* body) = 0;
 
+		virtual void	addRigidBody(btRigidBody* body, short group, short mask) = 0;
+
 		virtual void	removeRigidBody(btRigidBody* body) = 0;
 
 		virtual void	setConstraintSolver(btConstraintSolver* solver) = 0;
@@ -143,6 +146,21 @@ public:
 
 };
 
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btDynamicsWorldDoubleData
+{
+	btContactSolverInfoDoubleData	m_solverInfo;
+	btVector3DoubleData	m_gravity;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct btDynamicsWorldFloatData
+{
+	btContactSolverInfoFloatData	m_solverInfo;
+	btVector3FloatData	m_gravity;
+};
+
+
 #endif //BT_DYNAMICS_WORLD_H
 
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.cpp b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.cpp
index a4d8e1d77..222f90066 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.cpp
@@ -19,6 +19,7 @@ subject to the following restrictions:
 #include "LinearMath/btTransformUtil.h"
 #include "LinearMath/btMotionState.h"
 #include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
+#include "LinearMath/btSerializer.h"
 
 //'temporarily' global variables
 btScalar	gDeactivationTime = btScalar(2.);
@@ -50,8 +51,8 @@ void	btRigidBody::setupRigidBody(const btRigidBody::btRigidBodyConstructionInfo&
 	m_gravity_acceleration.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
 	m_totalForce.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
 	m_totalTorque.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0)),
-	m_linearDamping = btScalar(0.);
-	m_angularDamping = btScalar(0.5);
+    setDamping(constructionInfo.m_linearDamping, constructionInfo.m_angularDamping);
+
 	m_linearSleepingThreshold = constructionInfo.m_linearSleepingThreshold;
 	m_angularSleepingThreshold = constructionInfo.m_angularSleepingThreshold;
 	m_optionalMotionState = constructionInfo.m_motionState;
@@ -77,15 +78,26 @@ void	btRigidBody::setupRigidBody(const btRigidBody::btRigidBodyConstructionInfo&
 	
 	//moved to btCollisionObject
 	m_friction = constructionInfo.m_friction;
+	m_rollingFriction = constructionInfo.m_rollingFriction;
 	m_restitution = constructionInfo.m_restitution;
 
 	setCollisionShape( constructionInfo.m_collisionShape );
 	m_debugBodyId = uniqueId++;
 	
 	setMassProps(constructionInfo.m_mass, constructionInfo.m_localInertia);
-    setDamping(constructionInfo.m_linearDamping, constructionInfo.m_angularDamping);
 	updateInertiaTensor();
 
+	m_rigidbodyFlags = 0;
+
+
+	m_deltaLinearVelocity.setZero();
+	m_deltaAngularVelocity.setZero();
+	m_invMass = m_inverseMass*m_linearFactor;
+	m_pushVelocity.setZero();
+	m_turnVelocity.setZero();
+
+	
+
 }
 
 
@@ -136,8 +148,8 @@ void btRigidBody::setGravity(const btVector3& acceleration)
 
 void btRigidBody::setDamping(btScalar lin_damping, btScalar ang_damping)
 {
-	m_linearDamping = GEN_clamped(lin_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
-	m_angularDamping = GEN_clamped(ang_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+	m_linearDamping = btClamped(lin_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+	m_angularDamping = btClamped(ang_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
 }
 
 
@@ -227,21 +239,41 @@ void btRigidBody::setMassProps(btScalar mass, const btVector3& inertia)
 		m_collisionFlags &= (~btCollisionObject::CF_STATIC_OBJECT);
 		m_inverseMass = btScalar(1.0) / mass;
 	}
+
+	//Fg = m * a
+	m_gravity = mass * m_gravity_acceleration;
 	
 	m_invInertiaLocal.setValue(inertia.x() != btScalar(0.0) ? btScalar(1.0) / inertia.x(): btScalar(0.0),
 				   inertia.y() != btScalar(0.0) ? btScalar(1.0) / inertia.y(): btScalar(0.0),
 				   inertia.z() != btScalar(0.0) ? btScalar(1.0) / inertia.z(): btScalar(0.0));
 
+	m_invMass = m_linearFactor*m_inverseMass;
 }
 
 	
-
 void btRigidBody::updateInertiaTensor() 
 {
 	m_invInertiaTensorWorld = m_worldTransform.getBasis().scaled(m_invInertiaLocal) * m_worldTransform.getBasis().transpose();
 }
 
 
+btVector3 btRigidBody::computeGyroscopicForce(btScalar maxGyroscopicForce) const
+{
+	btVector3 inertiaLocal;
+	inertiaLocal[0] = 1.f/getInvInertiaDiagLocal()[0];
+	inertiaLocal[1] = 1.f/getInvInertiaDiagLocal()[1];
+	inertiaLocal[2] = 1.f/getInvInertiaDiagLocal()[2];
+	btMatrix3x3 inertiaTensorWorld = getWorldTransform().getBasis().scaled(inertiaLocal) * getWorldTransform().getBasis().transpose();
+	btVector3 tmp = inertiaTensorWorld*getAngularVelocity();
+	btVector3 gf = getAngularVelocity().cross(tmp);
+	btScalar l2 = gf.length2();
+	if (l2>maxGyroscopicForce*maxGyroscopicForce)
+	{
+		gf *= btScalar(1.)/btSqrt(l2)*maxGyroscopicForce;
+	}
+	return gf;
+}
+
 void btRigidBody::integrateVelocities(btScalar step) 
 {
 	if (isStaticOrKinematicObject())
@@ -271,7 +303,7 @@ btQuaternion btRigidBody::getOrientation() const
 void btRigidBody::setCenterOfMassTransform(const btTransform& xform)
 {
 
-	if (isStaticOrKinematicObject())
+	if (isKinematicObject())
 	{
 		m_interpolationWorldTransform = m_worldTransform;
 	} else
@@ -285,22 +317,25 @@ void btRigidBody::setCenterOfMassTransform(const btTransform& xform)
 }
 
 
-bool btRigidBody::checkCollideWithOverride(btCollisionObject* co)
+bool btRigidBody::checkCollideWithOverride(const  btCollisionObject* co) const
 {
-	btRigidBody* otherRb = btRigidBody::upcast(co);
+	const btRigidBody* otherRb = btRigidBody::upcast(co);
 	if (!otherRb)
 		return true;
 
 	for (int i = 0; i < m_constraintRefs.size(); ++i)
 	{
-		btTypedConstraint* c = m_constraintRefs[i];
-		if (&c->getRigidBodyA() == otherRb || &c->getRigidBodyB() == otherRb)
-			return false;
+		const btTypedConstraint* c = m_constraintRefs[i];
+		if (c->isEnabled())
+			if (&c->getRigidBodyA() == otherRb || &c->getRigidBodyB() == otherRb)
+				return false;
 	}
 
 	return true;
 }
 
+
+
 void btRigidBody::addConstraintRef(btTypedConstraint* c)
 {
 	int index = m_constraintRefs.findLinearSearch(c);
@@ -315,3 +350,51 @@ void btRigidBody::removeConstraintRef(btTypedConstraint* c)
 	m_constraintRefs.remove(c);
 	m_checkCollideWith = m_constraintRefs.size() > 0;
 }
+
+int	btRigidBody::calculateSerializeBufferSize()	const
+{
+	int sz = sizeof(btRigidBodyData);
+	return sz;
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btRigidBody::serialize(void* dataBuffer, class btSerializer* serializer) const
+{
+	btRigidBodyData* rbd = (btRigidBodyData*) dataBuffer;
+
+	btCollisionObject::serialize(&rbd->m_collisionObjectData, serializer);
+
+	m_invInertiaTensorWorld.serialize(rbd->m_invInertiaTensorWorld);
+	m_linearVelocity.serialize(rbd->m_linearVelocity);
+	m_angularVelocity.serialize(rbd->m_angularVelocity);
+	rbd->m_inverseMass = m_inverseMass;
+	m_angularFactor.serialize(rbd->m_angularFactor);
+	m_linearFactor.serialize(rbd->m_linearFactor);
+	m_gravity.serialize(rbd->m_gravity);
+	m_gravity_acceleration.serialize(rbd->m_gravity_acceleration);
+	m_invInertiaLocal.serialize(rbd->m_invInertiaLocal);
+	m_totalForce.serialize(rbd->m_totalForce);
+	m_totalTorque.serialize(rbd->m_totalTorque);
+	rbd->m_linearDamping = m_linearDamping;
+	rbd->m_angularDamping = m_angularDamping;
+	rbd->m_additionalDamping = m_additionalDamping;
+	rbd->m_additionalDampingFactor = m_additionalDampingFactor;
+	rbd->m_additionalLinearDampingThresholdSqr = m_additionalLinearDampingThresholdSqr;
+	rbd->m_additionalAngularDampingThresholdSqr = m_additionalAngularDampingThresholdSqr;
+	rbd->m_additionalAngularDampingFactor = m_additionalAngularDampingFactor;
+	rbd->m_linearSleepingThreshold=m_linearSleepingThreshold;
+	rbd->m_angularSleepingThreshold = m_angularSleepingThreshold;
+
+	return btRigidBodyDataName;
+}
+
+
+
+void btRigidBody::serializeSingleObject(class btSerializer* serializer) const
+{
+	btChunk* chunk = serializer->allocate(calculateSerializeBufferSize(),1);
+	const char* structType = serialize(chunk->m_oldPtr, serializer);
+	serializer->finalizeChunk(chunk,structType,BT_RIGIDBODY_CODE,(void*)this);
+}
+
+
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.h b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.h
index da1fcb786..c5bf63b4c 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btRigidBody.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef RIGIDBODY_H
-#define RIGIDBODY_H
+#ifndef BT_RIGIDBODY_H
+#define BT_RIGIDBODY_H
 
 #include "LinearMath/btAlignedObjectArray.h"
 #include "LinearMath/btTransform.h"
@@ -29,6 +29,24 @@ class btTypedConstraint;
 extern btScalar gDeactivationTime;
 extern bool gDisableDeactivation;
 
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btRigidBodyData	btRigidBodyDoubleData
+#define btRigidBodyDataName	"btRigidBodyDoubleData"
+#else
+#define btRigidBodyData	btRigidBodyFloatData
+#define btRigidBodyDataName	"btRigidBodyFloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+
+enum	btRigidBodyFlags
+{
+	BT_DISABLE_WORLD_GRAVITY = 1,
+	///The BT_ENABLE_GYROPSCOPIC_FORCE can easily introduce instability
+	///So generally it is best to not enable it. 
+	///If really needed, run at a high frequency like 1000 Hertz:	///See Demos/GyroscopicDemo for an example use
+	BT_ENABLE_GYROPSCOPIC_FORCE = 2
+};
+
 
 ///The btRigidBody is the main class for rigid body objects. It is derived from btCollisionObject, so it keeps a pointer to a btCollisionShape.
 ///It is recommended for performance and memory use to share btCollisionShape objects whenever possible.
@@ -45,7 +63,6 @@ class btRigidBody  : public btCollisionObject
 	btVector3		m_linearVelocity;
 	btVector3		m_angularVelocity;
 	btScalar		m_inverseMass;
-	btVector3		m_angularFactor;
 	btVector3		m_linearFactor;
 
 	btVector3		m_gravity;	
@@ -73,6 +90,21 @@ class btRigidBody  : public btCollisionObject
 	//keep track of typed constraints referencing this rigid body
 	btAlignedObjectArray<btTypedConstraint*> m_constraintRefs;
 
+	int				m_rigidbodyFlags;
+	
+	int				m_debugBodyId;
+	
+
+protected:
+
+	ATTRIBUTE_ALIGNED64(btVector3		m_deltaLinearVelocity);
+	btVector3		m_deltaAngularVelocity;
+	btVector3		m_angularFactor;
+	btVector3		m_invMass;
+	btVector3		m_pushVelocity;
+	btVector3		m_turnVelocity;
+
+
 public:
 
 
@@ -97,6 +129,9 @@ public:
 
 		///best simulation results when friction is non-zero
 		btScalar			m_friction;
+		///the m_rollingFriction prevents rounded shapes, such as spheres, cylinders and capsules from rolling forever.
+		///See Bullet/Demos/RollingFrictionDemo for usage
+		btScalar			m_rollingFriction;
 		///best simulation results using zero restitution.
 		btScalar			m_restitution;
 
@@ -111,7 +146,6 @@ public:
 		btScalar			m_additionalAngularDampingThresholdSqr;
 		btScalar			m_additionalAngularDampingFactor;
 
-		
 		btRigidBodyConstructionInfo(	btScalar mass, btMotionState* motionState, btCollisionShape* collisionShape, const btVector3& localInertia=btVector3(0,0,0)):
 		m_mass(mass),
 			m_motionState(motionState),
@@ -120,6 +154,7 @@ public:
 			m_linearDamping(btScalar(0.)),
 			m_angularDamping(btScalar(0.)),
 			m_friction(btScalar(0.5)),
+			m_rollingFriction(btScalar(0)),
 			m_restitution(btScalar(0.)),
 			m_linearSleepingThreshold(btScalar(0.8)),
 			m_angularSleepingThreshold(btScalar(1.f)),
@@ -161,13 +196,13 @@ public:
 	///but a rigidbody is derived from btCollisionObject, so we can safely perform an upcast
 	static const btRigidBody*	upcast(const btCollisionObject* colObj)
 	{
-		if (colObj->getInternalType()==btCollisionObject::CO_RIGID_BODY)
+		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
 			return (const btRigidBody*)colObj;
 		return 0;
 	}
 	static btRigidBody*	upcast(btCollisionObject* colObj)
 	{
-		if (colObj->getInternalType()==btCollisionObject::CO_RIGID_BODY)
+		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
 			return (btRigidBody*)colObj;
 		return 0;
 	}
@@ -227,6 +262,7 @@ public:
 	void setLinearFactor(const btVector3& linearFactor)
 	{
 		m_linearFactor = linearFactor;
+		m_invMass = m_linearFactor*m_inverseMass;
 	}
 	btScalar		getInvMass() const { return m_inverseMass; }
 	const btMatrix3x3& getInvInertiaTensorWorld() const { 
@@ -242,12 +278,12 @@ public:
 		m_totalForce += force*m_linearFactor;
 	}
 
-	const btVector3& getTotalForce()
+	const btVector3& getTotalForce() const
 	{
 		return m_totalForce;
 	};
 
-	const btVector3& getTotalTorque()
+	const btVector3& getTotalTorque() const
 	{
 		return m_totalTorque;
 	};
@@ -301,19 +337,6 @@ public:
 		}
 	}
 
-	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
-	SIMD_FORCE_INLINE void internalApplyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,btScalar impulseMagnitude)
-	{
-		if (m_inverseMass != btScalar(0.))
-		{
-			m_linearVelocity += linearComponent*m_linearFactor*impulseMagnitude;
-			if (m_angularFactor)
-			{
-				m_angularVelocity += angularComponent*m_angularFactor*impulseMagnitude;
-			}
-		}
-	}
-	
 	void clearForces() 
 	{
 		m_totalForce.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
@@ -479,7 +502,7 @@ public:
 		return (getBroadphaseProxy() != 0);
 	}
 
-	virtual bool checkCollideWithOverride(btCollisionObject* co);
+	virtual bool checkCollideWithOverride(const  btCollisionObject* co) const;
 
 	void addConstraintRef(btTypedConstraint* c);
 	void removeConstraintRef(btTypedConstraint* c);
@@ -489,15 +512,89 @@ public:
 		return m_constraintRefs[index];
 	}
 
-	int getNumConstraintRefs()
+	int getNumConstraintRefs() const
 	{
 		return m_constraintRefs.size();
 	}
 
-	int	m_debugBodyId;
+	void	setFlags(int flags)
+	{
+		m_rigidbodyFlags = flags;
+	}
+
+	int getFlags() const
+	{
+		return m_rigidbodyFlags;
+	}
+
+	btVector3 computeGyroscopicForce(btScalar maxGyroscopicForce) const;
+
+	///////////////////////////////////////////////
+
+	virtual	int	calculateSerializeBufferSize()	const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer,  class btSerializer* serializer) const;
+
+	virtual void serializeSingleObject(class btSerializer* serializer) const;
+
+};
+
+//@todo add m_optionalMotionState and m_constraintRefs to btRigidBodyData
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btRigidBodyFloatData
+{
+	btCollisionObjectFloatData	m_collisionObjectData;
+	btMatrix3x3FloatData		m_invInertiaTensorWorld;
+	btVector3FloatData		m_linearVelocity;
+	btVector3FloatData		m_angularVelocity;
+	btVector3FloatData		m_angularFactor;
+	btVector3FloatData		m_linearFactor;
+	btVector3FloatData		m_gravity;	
+	btVector3FloatData		m_gravity_acceleration;
+	btVector3FloatData		m_invInertiaLocal;
+	btVector3FloatData		m_totalForce;
+	btVector3FloatData		m_totalTorque;
+	float					m_inverseMass;
+	float					m_linearDamping;
+	float					m_angularDamping;
+	float					m_additionalDampingFactor;
+	float					m_additionalLinearDampingThresholdSqr;
+	float					m_additionalAngularDampingThresholdSqr;
+	float					m_additionalAngularDampingFactor;
+	float					m_linearSleepingThreshold;
+	float					m_angularSleepingThreshold;
+	int						m_additionalDamping;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btRigidBodyDoubleData
+{
+	btCollisionObjectDoubleData	m_collisionObjectData;
+	btMatrix3x3DoubleData		m_invInertiaTensorWorld;
+	btVector3DoubleData		m_linearVelocity;
+	btVector3DoubleData		m_angularVelocity;
+	btVector3DoubleData		m_angularFactor;
+	btVector3DoubleData		m_linearFactor;
+	btVector3DoubleData		m_gravity;	
+	btVector3DoubleData		m_gravity_acceleration;
+	btVector3DoubleData		m_invInertiaLocal;
+	btVector3DoubleData		m_totalForce;
+	btVector3DoubleData		m_totalTorque;
+	double					m_inverseMass;
+	double					m_linearDamping;
+	double					m_angularDamping;
+	double					m_additionalDampingFactor;
+	double					m_additionalLinearDampingThresholdSqr;
+	double					m_additionalAngularDampingThresholdSqr;
+	double					m_additionalAngularDampingFactor;
+	double					m_linearSleepingThreshold;
+	double					m_angularSleepingThreshold;
+	int						m_additionalDamping;
+	char	m_padding[4];
 };
 
 
 
-#endif
+#endif //BT_RIGIDBODY_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp
index ae449f292..5fc2f3cf8 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp
@@ -78,7 +78,7 @@ int		btSimpleDynamicsWorld::stepSimulation( btScalar timeStep,int maxSubSteps, b
 		btContactSolverInfo infoGlobal;
 		infoGlobal.m_timeStep = timeStep;
 		m_constraintSolver->prepareSolve(0,numManifolds);
-		m_constraintSolver->solveGroup(0,0,manifoldPtr, numManifolds,0,0,infoGlobal,m_debugDrawer, m_stackAlloc,m_dispatcher1);
+		m_constraintSolver->solveGroup(&getCollisionObjectArray()[0],getNumCollisionObjects(),manifoldPtr, numManifolds,0,0,infoGlobal,m_debugDrawer, m_stackAlloc,m_dispatcher1);
 		m_constraintSolver->allSolved(infoGlobal,m_debugDrawer, m_stackAlloc);
 	}
 
@@ -155,6 +155,33 @@ void	btSimpleDynamicsWorld::addRigidBody(btRigidBody* body)
 	}
 }
 
+void	btSimpleDynamicsWorld::addRigidBody(btRigidBody* body, short group, short mask)
+{
+	body->setGravity(m_gravity);
+
+	if (body->getCollisionShape())
+	{
+		addCollisionObject(body,group,mask);
+	}
+}
+
+
+void	btSimpleDynamicsWorld::debugDrawWorld()
+{
+
+}
+				
+void	btSimpleDynamicsWorld::addAction(btActionInterface* action)
+{
+
+}
+
+void	btSimpleDynamicsWorld::removeAction(btActionInterface* action)
+{
+
+}
+
+
 void	btSimpleDynamicsWorld::updateAabbs()
 {
 	btTransform predictedTrans;
diff --git a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h
index ad1f54134..d48d2e39c 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Dynamics/btSimpleDynamicsWorld.h
@@ -23,7 +23,7 @@ class btOverlappingPairCache;
 class btConstraintSolver;
 
 ///The btSimpleDynamicsWorld serves as unit-test and to verify more complicated and optimized dynamics worlds.
-///Please use btDiscreteDynamicsWorld instead (or btContinuousDynamicsWorld once it is finished).
+///Please use btDiscreteDynamicsWorld instead
 class btSimpleDynamicsWorld : public btDynamicsWorld
 {
 protected:
@@ -56,8 +56,16 @@ public:
 
 	virtual void	addRigidBody(btRigidBody* body);
 
+	virtual void	addRigidBody(btRigidBody* body, short group, short mask);
+
 	virtual void	removeRigidBody(btRigidBody* body);
 
+	virtual void	debugDrawWorld();
+				
+	virtual void	addAction(btActionInterface* action);
+
+	virtual void	removeAction(btActionInterface* action);
+
 	///removeCollisionObject will first check if it is a rigid body, if so call removeRigidBody otherwise call btCollisionWorld::removeCollisionObject
 	virtual void	removeCollisionObject(btCollisionObject* collisionObject);
 	
diff --git a/Engine/lib/bullet/src/BulletDynamics/Jamfile b/Engine/lib/bullet/src/BulletDynamics/Jamfile
deleted file mode 100644
index b4c52b230..000000000
--- a/Engine/lib/bullet/src/BulletDynamics/Jamfile
+++ /dev/null
@@ -1,13 +0,0 @@
-
-SubDir TOP src BulletDynamics ;
-
-Description bulletdynamics : "Bullet Rigidbody Dynamics" ;
-Library bulletdynamics : 
-  [ Wildcard ConstraintSolver : *.h *.cpp ] 
-  [ Wildcard Dynamics : *.h *.cpp ] 
-  [ Wildcard Vehicle : *.h *.cpp ] 
-  [ Wildcard Character : *.h *.cpp ]
-;
-
-LibDepends bulletdynamics : bulletcollision  ;
-
diff --git a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp
index 031fcb5b4..77b475b96 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp
+++ b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.cpp
@@ -22,7 +22,15 @@
 #include "LinearMath/btIDebugDraw.h"
 #include "BulletDynamics/ConstraintSolver/btContactConstraint.h"
 
-static btRigidBody s_fixedObject( 0,0,0);
+#define ROLLING_INFLUENCE_FIX
+
+
+btRigidBody& btActionInterface::getFixedBody()
+{
+	static btRigidBody s_fixed(0, 0,0);
+	s_fixed.setMassProps(btScalar(0.),btVector3(btScalar(0.),btScalar(0.),btScalar(0.)));
+	return s_fixed;
+}
 
 btRaycastVehicle::btRaycastVehicle(const btVehicleTuning& tuning,btRigidBody* chassis,	btVehicleRaycaster* raycaster )
 :m_vehicleRaycaster(raycaster),
@@ -70,6 +78,7 @@ btWheelInfo&	btRaycastVehicle::addWheel( const btVector3& connectionPointCS, con
 	ci.m_frictionSlip = tuning.m_frictionSlip;
 	ci.m_bIsFrontWheel = isFrontWheel;
 	ci.m_maxSuspensionTravelCm = tuning.m_maxSuspensionTravelCm;
+	ci.m_maxSuspensionForce = tuning.m_maxSuspensionForce;
 
 	m_wheelInfo.push_back( btWheelInfo(ci));
 	
@@ -186,7 +195,7 @@ btScalar btRaycastVehicle::rayCast(btWheelInfo& wheel)
 		wheel.m_raycastInfo.m_contactNormalWS  = rayResults.m_hitNormalInWorld;
 		wheel.m_raycastInfo.m_isInContact = true;
 		
-		wheel.m_raycastInfo.m_groundObject = &s_fixedObject;///@todo for driving on dynamic/movable objects!;
+		wheel.m_raycastInfo.m_groundObject = &getFixedBody();///@todo for driving on dynamic/movable objects!;
 		//wheel.m_raycastInfo.m_groundObject = object;
 
 
@@ -301,10 +310,9 @@ void btRaycastVehicle::updateVehicle( btScalar step )
 		
 		btScalar suspensionForce = wheel.m_wheelsSuspensionForce;
 		
-		btScalar gMaxSuspensionForce = btScalar(6000.);
-		if (suspensionForce > gMaxSuspensionForce)
+		if (suspensionForce > wheel.m_maxSuspensionForce)
 		{
-			suspensionForce = gMaxSuspensionForce;
+			suspensionForce = wheel.m_maxSuspensionForce;
 		}
 		btVector3 impulse = wheel.m_raycastInfo.m_contactNormalWS * suspensionForce * step;
 		btVector3 relpos = wheel.m_raycastInfo.m_contactPointWS - getRigidBody()->getCenterOfMassPosition();
@@ -689,7 +697,12 @@ void	btRaycastVehicle::updateFriction(btScalar	timeStep)
 					
 					btVector3 sideImp = m_axle[wheel] * m_sideImpulse[wheel];
 
+#if defined ROLLING_INFLUENCE_FIX // fix. It only worked if car's up was along Y - VT.
+					btVector3 vChassisWorldUp = getRigidBody()->getCenterOfMassTransform().getBasis().getColumn(m_indexUpAxis);
+					rel_pos -= vChassisWorldUp * (vChassisWorldUp.dot(rel_pos) * (1.f-wheelInfo.m_rollInfluence));
+#else
 					rel_pos[m_indexUpAxis] *= wheelInfo.m_rollInfluence;
+#endif
 					m_chassisBody->applyImpulse(sideImp,rel_pos);
 
 					//apply friction impulse on the ground
@@ -708,13 +721,13 @@ void	btRaycastVehicle::debugDraw(btIDebugDraw* debugDrawer)
 
 	for (int v=0;v<this->getNumWheels();v++)
 	{
-		btVector3 wheelColor(0,255,255);
+		btVector3 wheelColor(0,1,1);
 		if (getWheelInfo(v).m_raycastInfo.m_isInContact)
 		{
-			wheelColor.setValue(0,0,255);
+			wheelColor.setValue(0,0,1);
 		} else
 		{
-			wheelColor.setValue(255,0,255);
+			wheelColor.setValue(1,0,1);
 		}
 
 		btVector3 wheelPosWS = getWheelInfo(v).m_worldTransform.getOrigin();
@@ -743,14 +756,14 @@ void* btDefaultVehicleRaycaster::castRay(const btVector3& from,const btVector3&
 	if (rayCallback.hasHit())
 	{
 		
-		btRigidBody* body = btRigidBody::upcast(rayCallback.m_collisionObject);
+		const btRigidBody* body = btRigidBody::upcast(rayCallback.m_collisionObject);
         if (body && body->hasContactResponse())
 		{
 			result.m_hitPointInWorld = rayCallback.m_hitPointWorld;
 			result.m_hitNormalInWorld = rayCallback.m_hitNormalWorld;
 			result.m_hitNormalInWorld.normalize();
 			result.m_distFraction = rayCallback.m_closestHitFraction;
-			return body;
+			return (void*)body;
 		}
 	}
 	return 0;
diff --git a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.h b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.h
index 58eef98d2..f59555f94 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btRaycastVehicle.h
@@ -8,8 +8,8 @@
  * of this software for any purpose.  
  * It is provided "as is" without express or implied warranty.
 */
-#ifndef RAYCASTVEHICLE_H
-#define RAYCASTVEHICLE_H
+#ifndef BT_RAYCASTVEHICLE_H
+#define BT_RAYCASTVEHICLE_H
 
 #include "BulletDynamics/Dynamics/btRigidBody.h"
 #include "BulletDynamics/ConstraintSolver/btTypedConstraint.h"
@@ -29,6 +29,10 @@ class btRaycastVehicle : public btActionInterface
 		btAlignedObjectArray<btVector3>	m_axle;
 		btAlignedObjectArray<btScalar>	m_forwardImpulse;
 		btAlignedObjectArray<btScalar>	m_sideImpulse;
+	
+		///backwards compatibility
+		int	m_userConstraintType;
+		int	m_userConstraintId;
 
 public:
 	class btVehicleTuning
@@ -40,7 +44,8 @@ public:
 				m_suspensionCompression(btScalar(0.83)),
 				m_suspensionDamping(btScalar(0.88)),
 				m_maxSuspensionTravelCm(btScalar(500.)),
-				m_frictionSlip(btScalar(10.5))
+				m_frictionSlip(btScalar(10.5)),
+				m_maxSuspensionForce(btScalar(6000.))
 			{
 			}
 			btScalar	m_suspensionStiffness;
@@ -48,6 +53,7 @@ public:
 			btScalar	m_suspensionDamping;
 			btScalar	m_maxSuspensionTravelCm;
 			btScalar	m_frictionSlip;
+			btScalar	m_maxSuspensionForce;
 
 		};
 private:
@@ -78,6 +84,7 @@ public:
 	///btActionInterface interface
 	virtual void updateAction( btCollisionWorld* collisionWorld, btScalar step)
 	{
+        (void) collisionWorld;
 		updateVehicle(step);
 	}
 	
@@ -105,7 +112,7 @@ public:
 
 	void	updateWheelTransform( int wheelIndex, bool interpolatedTransform = true );
 	
-	void	setRaycastWheelInfo( int wheelIndex , bool isInContact, const btVector3& hitPoint, const btVector3& hitNormal,btScalar depth);
+//	void	setRaycastWheelInfo( int wheelIndex , bool isInContact, const btVector3& hitPoint, const btVector3& hitNormal,btScalar depth);
 
 	btWheelInfo&	addWheel( const btVector3& connectionPointCS0, const btVector3& wheelDirectionCS0,const btVector3& wheelAxleCS,btScalar suspensionRestLength,btScalar wheelRadius,const btVehicleTuning& tuning, bool isFrontWheel);
 
@@ -188,6 +195,26 @@ public:
 	}
 
 
+	///backwards compatibility
+	int getUserConstraintType() const
+	{
+		return m_userConstraintType ;
+	}
+
+	void	setUserConstraintType(int userConstraintType)
+	{
+		m_userConstraintType = userConstraintType;
+	};
+
+	void	setUserConstraintId(int uid)
+	{
+		m_userConstraintId = uid;
+	}
+
+	int getUserConstraintId() const
+	{
+		return m_userConstraintId;
+	}
 
 };
 
@@ -205,5 +232,5 @@ public:
 };
 
 
-#endif //RAYCASTVEHICLE_H
+#endif //BT_RAYCASTVEHICLE_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btVehicleRaycaster.h b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btVehicleRaycaster.h
index 5112ce6d4..3cc909c65 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btVehicleRaycaster.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btVehicleRaycaster.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005 Erwin Coumans http://continuousphysics.com/Bullet/
+ * Copyright (c) 2005 Erwin Coumans http://bulletphysics.org
  *
  * Permission to use, copy, modify, distribute and sell this software
  * and its documentation for any purpose is hereby granted without fee,
@@ -8,8 +8,8 @@
  * of this software for any purpose.  
  * It is provided "as is" without express or implied warranty.
 */
-#ifndef VEHICLE_RAYCASTER_H
-#define VEHICLE_RAYCASTER_H
+#ifndef BT_VEHICLE_RAYCASTER_H
+#define BT_VEHICLE_RAYCASTER_H
 
 #include "LinearMath/btVector3.h"
 
@@ -31,5 +31,5 @@ virtual ~btVehicleRaycaster()
 
 };
 
-#endif //VEHICLE_RAYCASTER_H
+#endif //BT_VEHICLE_RAYCASTER_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btWheelInfo.h b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btWheelInfo.h
index ac2729f4f..f916053ec 100644
--- a/Engine/lib/bullet/src/BulletDynamics/Vehicle/btWheelInfo.h
+++ b/Engine/lib/bullet/src/BulletDynamics/Vehicle/btWheelInfo.h
@@ -8,8 +8,8 @@
  * of this software for any purpose.  
  * It is provided "as is" without express or implied warranty.
 */
-#ifndef WHEEL_INFO_H
-#define WHEEL_INFO_H
+#ifndef BT_WHEEL_INFO_H
+#define BT_WHEEL_INFO_H
 
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btTransform.h"
@@ -29,6 +29,7 @@ struct btWheelInfoConstructionInfo
 	btScalar		m_wheelsDampingCompression;
 	btScalar		m_wheelsDampingRelaxation;
 	btScalar		m_frictionSlip;
+	btScalar		m_maxSuspensionForce;
 	bool m_bIsFrontWheel;
 	
 };
@@ -68,6 +69,7 @@ struct btWheelInfo
 	btScalar	m_rotation;
 	btScalar	m_deltaRotation;
 	btScalar	m_rollInfluence;
+	btScalar	m_maxSuspensionForce;
 
 	btScalar	m_engineForce;
 
@@ -99,6 +101,7 @@ struct btWheelInfo
 		m_brake = btScalar(0.);
 		m_rollInfluence = btScalar(0.1);
 		m_bIsFrontWheel = ci.m_bIsFrontWheel;
+		m_maxSuspensionForce = ci.m_maxSuspensionForce;
 
 	}
 
@@ -112,5 +115,5 @@ struct btWheelInfo
 
 };
 
-#endif //WHEEL_INFO_H
+#endif //BT_WHEEL_INFO_H
 
diff --git a/Engine/lib/bullet/src/BulletDynamics/ibmsdk/Makefile b/Engine/lib/bullet/src/BulletDynamics/ibmsdk/Makefile
deleted file mode 100644
index 57505d835..000000000
--- a/Engine/lib/bullet/src/BulletDynamics/ibmsdk/Makefile
+++ /dev/null
@@ -1,53 +0,0 @@
-#### Source code Dirs
-VPATH =				\
-../ConstraintSolver		\
-../Dynamics			\
-../Vehicle
-
-ROOT = ../../..
-
-#### Library
-LIBRARY_ppu = bulletdynamics.a
-
-#### Compiler flags
-CPPFLAGS        = 		\
--DUSE_LIBSPE2			\
--I../ConstraintSolver		\
--I../Dynamics			\
--I../Vehicle			\
--I$(ROOT)/src			\
--I$(SDKINC)
-
-#### Optimization level flags
-#CC_OPT_LEVEL =  $(CC_OPT_LEVEL_DEBUG)
-CC_OPT_LEVEL =  -O3
-
-##### Objects to be archived in lib
-
-OBJS = 					\
-btContactConstraint.o			\
-btGeneric6DofConstraint.o		\
-btHingeConstraint.o			\
-btPoint2PointConstraint.o		\
-btSequentialImpulseConstraintSolver.o	\
-btSolve2LinearConstraint.o		\
-btTypedConstraint.o			\
-btDiscreteDynamicsWorld.o		\
-btRigidBody.o				\
-btSimpleDynamicsWorld.o			\
-btRaycastVehicle.o			\
-btWheelInfo.o
-#### Install directories 
-INSTALL_DIR	=  $(ROOT)/lib/ibmsdk
-INSTALL_FILES	= $(LIBRARY_ppu)
-
-IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
-
-ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
-        CELL_TOP ?= /opt/cell/sdk
-        include $(CELL_TOP)/buildutils/make.footer
-else
-        CELL_TOP ?= /opt/ibm/cell-sdk/prototype
-        include $(CELL_TOP)/make.footer
-endif
-
diff --git a/Engine/lib/bullet/src/BulletDynamics/premake4.lua b/Engine/lib/bullet/src/BulletDynamics/premake4.lua
new file mode 100644
index 000000000..919edaa76
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletDynamics/premake4.lua
@@ -0,0 +1,11 @@
+	project "BulletDynamics"
+		
+	kind "StaticLib"
+	targetdir "../../lib"
+	includedirs {
+		"..",
+	}
+	files {
+		"**.cpp",
+		"**.h"
+	}
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/CMakeLists.txt
index 31fbd0411..6eeeb6d20 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/CMakeLists.txt
@@ -1,71 +1,123 @@
 INCLUDE_DIRECTORIES(
 	${BULLET_PHYSICS_SOURCE_DIR}/src
-	${BULLET_PHYSICS_SOURCE_DIR}/src/BulletMultiThreaded/vectormath/scalar/cpp
+	${VECTOR_MATH_INCLUDE}
 )
 
-ADD_LIBRARY(BulletMultiThreaded
-		PlatformDefinitions.h
-		SpuFakeDma.cpp
-		SpuFakeDma.h
-		SpuSync.h
-		SpuDoubleBuffer.h
-		SpuLibspe2Support.cpp
-		SpuLibspe2Support.h
-		btThreadSupportInterface.cpp
-		btThreadSupportInterface.h
-		
-		Win32ThreadSupport.cpp
-		Win32ThreadSupport.h
-		PosixThreadSupport.cpp
-		PosixThreadSupport.h
-		SequentialThreadSupport.cpp
-		SequentialThreadSupport.h
-		SpuSampleTaskProcess.h
-		SpuSampleTaskProcess.cpp
+SET(BulletMultiThreaded_SRCS
+	SpuFakeDma.cpp
+	SpuLibspe2Support.cpp
+	btThreadSupportInterface.cpp
+	Win32ThreadSupport.cpp
+	PosixThreadSupport.cpp
+	SequentialThreadSupport.cpp
+	SpuSampleTaskProcess.cpp
+	SpuCollisionObjectWrapper.cpp 
+	SpuCollisionTaskProcess.cpp
+	SpuGatheringCollisionDispatcher.cpp
+	SpuContactManifoldCollisionAlgorithm.cpp
+	btParallelConstraintSolver.cpp
+	
+	#SPURS_PEGatherScatterTask/SpuPEGatherScatterTask.cpp
+	#SpuPEGatherScatterTaskProcess.cpp
 
-		SpuCollisionObjectWrapper.cpp 
-		SpuCollisionObjectWrapper.h 
-		SpuCollisionTaskProcess.h
-		SpuCollisionTaskProcess.cpp
-		SpuGatheringCollisionDispatcher.h
-		SpuGatheringCollisionDispatcher.cpp
-		SpuContactManifoldCollisionAlgorithm.cpp
-		SpuContactManifoldCollisionAlgorithm.h
-		SpuNarrowPhaseCollisionTask/Box.h
-		SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
-		SpuNarrowPhaseCollisionTask/boxBoxDistance.h
-		SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
-		SpuNarrowPhaseCollisionTask/SpuContactResult.h
-		SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
-		SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
-		SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
-		SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
-		SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
-		SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
-		SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
-		SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
+	SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
+	SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
+	SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.cpp
+	SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
+	SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
 
-
-#Some GPU related stuff, mainly CUDA and perhaps OpenCL
-		btGpu3DGridBroadphase.cpp
-		btGpu3DGridBroadphase.h
-		btGpu3DGridBroadphaseSharedCode.h
-		btGpu3DGridBroadphaseSharedDefs.h
-		btGpu3DGridBroadphaseSharedTypes.h
-		btGpuDefines.h
-		btGpuUtilsSharedCode.h
-		btGpuUtilsSharedDefs.h
-
-#MiniCL provides a small subset of OpenCL
-		MiniCLTaskScheduler.cpp
-		MiniCLTaskScheduler.h
-		MiniCLTask/MiniCLTask.cpp
-		MiniCLTask/MiniCLTask.h
-		../MiniCL/cl.h
-		../MiniCL/cl_gl.h
-		../MiniCL/cl_platform.h
+	#Some GPU related stuff, mainly CUDA and perhaps OpenCL
+	btGpu3DGridBroadphase.cpp
 )
 
+SET(Root_HDRS
+	PlatformDefinitions.h
+	PpuAddressSpace.h
+	SpuFakeDma.h
+	SpuDoubleBuffer.h
+	SpuLibspe2Support.h
+	btThreadSupportInterface.h
+	Win32ThreadSupport.h
+	PosixThreadSupport.h
+	SequentialThreadSupport.h
+	SpuSampleTaskProcess.h
+	SpuCollisionObjectWrapper.cpp 
+	SpuCollisionObjectWrapper.h 
+	SpuCollisionTaskProcess.h
+	SpuGatheringCollisionDispatcher.h
+	SpuContactManifoldCollisionAlgorithm.h
+	btParallelConstraintSolver.h
+
+	#SPURS_PEGatherScatterTask/SpuPEGatherScatterTask.h
+	#SpuPEGatherScatterTaskProcess.h
+
+	#Some GPU related stuff, mainly CUDA and perhaps OpenCL
+	btGpu3DGridBroadphase.h
+	btGpu3DGridBroadphaseSharedCode.h
+	btGpu3DGridBroadphaseSharedDefs.h
+	btGpu3DGridBroadphaseSharedTypes.h
+	btGpuDefines.h
+	btGpuUtilsSharedCode.h
+	btGpuUtilsSharedDefs.h
+)
+
+SET(SpuNarrowPhaseCollisionTask_HDRS
+	SpuNarrowPhaseCollisionTask/Box.h
+	SpuNarrowPhaseCollisionTask/boxBoxDistance.h
+	SpuNarrowPhaseCollisionTask/SpuContactResult.h
+	SpuNarrowPhaseCollisionTask/SpuMinkowskiPenetrationDepthSolver.h
+	SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
+	SpuNarrowPhaseCollisionTask/SpuPreferredPenetrationDirections.h
+	SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
+	SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
+)
+
+SET(BulletMultiThreaded_HDRS
+	${Root_HDRS}
+	${SpuNarrowPhaseCollisionTask_HDRS}
+)
+
+ADD_LIBRARY(BulletMultiThreaded ${BulletMultiThreaded_SRCS} ${BulletMultiThreaded_HDRS})
+SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES SOVERSION ${BULLET_VERSION})
+
+
+SUBDIRS(GpuSoftBodySolvers)
+
+
 IF (BUILD_SHARED_LIBS)
-	TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletCollision)
+	IF (UNIX)
+		TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletDynamics BulletCollision pthread)
+	ELSE()
+		TARGET_LINK_LIBRARIES(BulletMultiThreaded BulletDynamics BulletCollision)
+	ENDIF()
 ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		#INSTALL of other files requires CMake 2.6
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+#			IF(INSTALL_EXTRA_LIBS)
+				IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+					INSTALL(TARGETS BulletMultiThreaded DESTINATION .)
+				ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+					INSTALL(TARGETS BulletMultiThreaded DESTINATION lib${LIB_SUFFIX})
+					INSTALL(DIRECTORY
+${CMAKE_CURRENT_SOURCE_DIR} DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING
+PATTERN "*.h"  PATTERN ".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
+				ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+#			ENDIF (INSTALL_EXTRA_LIBS)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES FRAMEWORK true)
+		
+			SET_TARGET_PROPERTIES(BulletMultiThreaded PROPERTIES PUBLIC_HEADER "${Root_HDRS}")
+			# Have to list out sub-directories manually:
+			SET_PROPERTY(SOURCE ${SpuNarrowPhaseCollisionTask_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/SpuNarrowPhaseCollisionTask)
+		
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt
new file mode 100644
index 000000000..224a3e0a8
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/CMakeLists.txt
@@ -0,0 +1,13 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+
+SUBDIRS ( 
+	OpenCL
+)
+
+IF( USE_DX11 )
+	SUBDIRS( DX11 )
+ENDIF( USE_DX11 )
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt
new file mode 100644
index 000000000..e7492b542
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/CMakeLists.txt
@@ -0,0 +1,83 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+SET(DXSDK_DIR $ENV{DXSDK_DIR})
+SET(DX11_INCLUDE_PATH  "${DIRECTX_SDK_BASE_DIR}/Include" CACHE DOCSTRING "Microsoft directX SDK include path")
+
+
+INCLUDE_DIRECTORIES(
+${DX11_INCLUDE_PATH} "../Shared/"
+${VECTOR_MATH_INCLUDE}
+)
+
+SET(BulletSoftBodyDX11Solvers_SRCS
+	btSoftBodySolver_DX11.cpp
+	btSoftBodySolver_DX11SIMDAware.cpp
+)
+
+SET(BulletSoftBodyDX11Solvers_HDRS
+	btSoftBodySolver_DX11.h
+	btSoftBodySolver_DX11SIMDAware.h
+	../Shared/btSoftBodySolverData.h
+	btSoftBodySolverVertexData_DX11.h
+	btSoftBodySolverTriangleData_DX11.h
+	btSoftBodySolverLinkData_DX11.h
+	btSoftBodySolverLinkData_DX11SIMDAware.h
+	btSoftBodySolverBuffer_DX11.h
+	btSoftBodySolverVertexBuffer_DX11.h
+
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyDX11Solvers_Shaders
+	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	ComputeBounds
+	SolvePositions
+	SolvePositionsSIMDBatched
+	SolveCollisionsAndUpdateVelocities
+	SolveCollisionsAndUpdateVelocitiesSIMDBatched
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyDX11Solvers_Shaders})
+    LIST(APPEND BulletSoftBodyDX11Solvers_HLSL "HLSL/${f}.hlsl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_DX11  ${BulletSoftBodyDX11Solvers_SRCS} ${BulletSoftBodyDX11Solvers_HDRS} ${BulletSoftBodyDX11Solvers_HLSL})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_DX11 BulletSoftBody BulletDynamics)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			INSTALL(TARGETS BulletSoftBodySolvers_DX11 DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_DX11 PROPERTIES PUBLIC_HEADER "${BulletSoftBodyDX11Solvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ApplyForces.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ApplyForces.hlsl
new file mode 100644
index 000000000..37e22695b
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ApplyForces.hlsl
@@ -0,0 +1,95 @@
+MSTRINGIFY(
+
+cbuffer ApplyForcesCB : register( b0 )
+{
+	unsigned int numNodes;
+	float solverdt;
+	float epsilon;
+	int padding3;
+};
+
+
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexNormal : register( t1 );
+StructuredBuffer<float> g_vertexArea : register( t2 );
+StructuredBuffer<float> g_vertexInverseMass : register( t3 );
+// TODO: These could be combined into a lift/drag factor array along with medium density
+StructuredBuffer<float> g_clothLiftFactor : register( t4 );
+StructuredBuffer<float> g_clothDragFactor : register( t5 );
+StructuredBuffer<float4> g_clothWindVelocity : register( t6 );
+StructuredBuffer<float4> g_clothAcceleration : register( t7 );
+StructuredBuffer<float> g_clothMediumDensity : register( t8 );
+
+RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
+
+float3 projectOnAxis( float3 v, float3 a )
+{
+	return (a*dot(v, a));
+}
+
+[numthreads(128, 1, 1)]
+void 
+ApplyForcesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	unsigned int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{		
+		int clothId = g_vertexClothIdentifier[nodeID];
+		float nodeIM = g_vertexInverseMass[nodeID];
+		
+		if( nodeIM > 0.0f )
+		{
+			float3 nodeV = g_vertexVelocity[nodeID].xyz;
+			float3 normal = g_vertexNormal[nodeID].xyz;
+			float area = g_vertexArea[nodeID];
+			float3 nodeF = g_vertexForceAccumulator[nodeID].xyz;
+			
+			// Read per-cloth values
+			float3 clothAcceleration = g_clothAcceleration[clothId].xyz;
+			float3 clothWindVelocity = g_clothWindVelocity[clothId].xyz;
+			float liftFactor = g_clothLiftFactor[clothId];
+			float dragFactor = g_clothDragFactor[clothId];
+			float mediumDensity = g_clothMediumDensity[clothId];
+		
+			// Apply the acceleration to the cloth rather than do this via a force
+			nodeV += (clothAcceleration*solverdt);
+
+			g_vertexVelocity[nodeID] = float4(nodeV, 0.f);
+
+			float3 relativeWindVelocity = nodeV - clothWindVelocity;
+			float relativeSpeedSquared = dot(relativeWindVelocity, relativeWindVelocity);
+			
+			if( relativeSpeedSquared > epsilon )
+			{
+				// Correct direction of normal relative to wind direction and get dot product
+				normal = normal * (dot(normal, relativeWindVelocity) < 0 ? -1.f : 1.f);
+				float dvNormal = dot(normal, relativeWindVelocity);
+				if( dvNormal > 0 )
+				{
+					float3 force = float3(0.f, 0.f, 0.f);
+					float c0 = area * dvNormal * relativeSpeedSquared / 2.f;
+					float c1 = c0 * mediumDensity;
+					force += normal * (-c1 * liftFactor);
+					force += normalize(relativeWindVelocity)*(-c1 * dragFactor);
+					
+					float dtim = solverdt * nodeIM;
+					float3 forceDTIM = force * dtim;
+					
+					float3 nodeFPlusForce = nodeF + force;
+					
+					// m_nodesf[i] -= ProjectOnAxis(m_nodesv[i], force.normalized())/dtim;	
+					float3 nodeFMinus = nodeF - (projectOnAxis(nodeV, normalize(force))/dtim);
+					
+					nodeF = nodeFPlusForce;
+					if( dot(forceDTIM, forceDTIM) > dot(nodeV, nodeV) )
+						nodeF = nodeFMinus;
+									
+					g_vertexForceAccumulator[nodeID] = float4(nodeF, 0.0f);	
+				}
+			}
+		}
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ComputeBounds.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ComputeBounds.hlsl
new file mode 100644
index 000000000..65ae515ca
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/ComputeBounds.hlsl
@@ -0,0 +1,83 @@
+MSTRINGIFY(
+
+cbuffer ComputeBoundsCB : register( b0 )
+{
+	int numNodes;
+	int numSoftBodies;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+
+RWStructuredBuffer<uint4> g_clothMinBounds : register( u0 );
+RWStructuredBuffer<uint4> g_clothMaxBounds : register( u1 );
+
+groupshared uint4 clothMinBounds[256];
+groupshared uint4 clothMaxBounds[256];
+
+[numthreads(128, 1, 1)]
+void 
+ComputeBoundsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	const unsigned int UINT_MAX = 0xffffffff;
+
+	// Init min and max bounds arrays
+	if( GTid.x < numSoftBodies )
+	{
+		clothMinBounds[GTid.x] = uint4(UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX);
+		clothMaxBounds[GTid.x] = uint4(0,0,0,0);
+	}
+
+	AllMemoryBarrierWithGroupSync();
+
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		if( clothIdentifier >= 0 )
+		{
+			float3 position = g_vertexPositions[nodeID].xyz;
+
+			// Reinterpret position as uint
+			uint3 positionUInt = uint3(asuint(position.x), asuint(position.y), asuint(position.z));
+		
+			// Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints
+			//positionUInt.x ^= uint((-int(positionUInt.x >> 31) | 0x80000000));
+			//positionUInt.y ^= uint((-int(positionUInt.y >> 31) | 0x80000000));
+			//positionUInt.z ^= uint((-int(positionUInt.z >> 31) | 0x80000000));
+			positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
+			positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);		
+			positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
+		
+			// Min/max with the LDS values
+			InterlockedMin(clothMinBounds[clothIdentifier].x, positionUInt.x);
+			InterlockedMin(clothMinBounds[clothIdentifier].y, positionUInt.y);
+			InterlockedMin(clothMinBounds[clothIdentifier].z, positionUInt.z);
+
+			InterlockedMax(clothMaxBounds[clothIdentifier].x, positionUInt.x);
+			InterlockedMax(clothMaxBounds[clothIdentifier].y, positionUInt.y);
+			InterlockedMax(clothMaxBounds[clothIdentifier].z, positionUInt.z);
+		}
+	}
+	
+	AllMemoryBarrierWithGroupSync();
+
+
+	// Use global atomics to update the global versions of the data
+	if( GTid.x < numSoftBodies )
+	{
+		InterlockedMin(g_clothMinBounds[GTid.x].x, clothMinBounds[GTid.x].x);
+		InterlockedMin(g_clothMinBounds[GTid.x].y, clothMinBounds[GTid.x].y);
+		InterlockedMin(g_clothMinBounds[GTid.x].z, clothMinBounds[GTid.x].z);
+
+		InterlockedMax(g_clothMaxBounds[GTid.x].x, clothMaxBounds[GTid.x].x);		
+		InterlockedMax(g_clothMaxBounds[GTid.x].y, clothMaxBounds[GTid.x].y);
+		InterlockedMax(g_clothMaxBounds[GTid.x].z, clothMaxBounds[GTid.x].z);
+	}
+}
+
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/Integrate.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/Integrate.hlsl
new file mode 100644
index 000000000..f85fd115c
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/Integrate.hlsl
@@ -0,0 +1,41 @@
+MSTRINGIFY(
+
+cbuffer IntegrateCB : register( b0 )
+{
+	int numNodes;
+	float solverdt;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<float> g_vertexInverseMasses : register( t0 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocity : register( u1 );
+RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u2 );
+RWStructuredBuffer<float4> g_vertexForceAccumulator : register( u3 );
+
+[numthreads(128, 1, 1)]
+void 
+IntegrateKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocity[nodeID].xyz;
+		float3 force = g_vertexForceAccumulator[nodeID].xyz;
+		float inverseMass = g_vertexInverseMasses[nodeID];
+		
+		g_vertexPreviousPositions[nodeID] = float4(position, 0.f);
+		velocity += force * inverseMass * solverdt;
+		position += velocity * solverdt;
+		
+		g_vertexForceAccumulator[nodeID] = float4(0.f, 0.f, 0.f, 0.0f);
+		g_vertexPositions[nodeID] = float4(position, 0.f);
+		g_vertexVelocity[nodeID] = float4(velocity, 0.f);	
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/OutputToVertexArray.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/OutputToVertexArray.hlsl
new file mode 100644
index 000000000..a6fa7b950
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/OutputToVertexArray.hlsl
@@ -0,0 +1,63 @@
+MSTRINGIFY(
+
+cbuffer OutputToVertexArrayCB : register( b0 )
+{
+	int startNode;
+	int numNodes;
+	int positionOffset;
+	int positionStride;
+	
+	int normalOffset;	
+	int normalStride;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexNormals : register( t1 );
+
+RWBuffer<float> g_vertexBuffer : register( u0 );
+
+
+[numthreads(128, 1, 1)]
+void 
+OutputToVertexArrayWithNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;
+		
+		int normalDestination = nodeID * normalStride + normalOffset;
+		g_vertexBuffer[normalDestination] = normal.x;
+		g_vertexBuffer[normalDestination+1] = normal.y;
+		g_vertexBuffer[normalDestination+2] = normal.z;		
+	}
+}
+
+[numthreads(128, 1, 1)]
+void 
+OutputToVertexArrayWithoutNormalsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;		
+	}
+}
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/PrepareLinks.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/PrepareLinks.hlsl
new file mode 100644
index 000000000..75db8d149
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/PrepareLinks.hlsl
@@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+cbuffer PrepareLinksCB : register( b0 )
+{
+	int numLinks;
+	int padding0;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+StructuredBuffer<float> g_linksMassLSC : register( t1 );
+StructuredBuffer<float4> g_nodesPreviousPosition : register( t2 );
+
+RWStructuredBuffer<float> g_linksLengthRatio : register( u0 );
+RWStructuredBuffer<float4> g_linksCurrentLength : register( u1 );
+
+[numthreads(128, 1, 1)]
+void 
+PrepareLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x;
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
+		float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
+
+		float massLSC = g_linksMassLSC[linkID];
+		
+		float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
+		
+		float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
+		linkLengthRatio = 1./linkLengthRatio;
+		
+		g_linksCurrentLength[linkID] = linkCurrentLength;
+		g_linksLengthRatio[linkID] = linkLengthRatio;		
+	}
+}
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositions.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositions.hlsl
new file mode 100644
index 000000000..de979d7f9
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositions.hlsl
@@ -0,0 +1,55 @@
+MSTRINGIFY(
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	int startLink;
+	int numLinks;
+	float kst;
+	float ti;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+
+StructuredBuffer<float> g_linksMassLSC : register( t1 );
+StructuredBuffer<float> g_linksRestLengthSquared : register( t2 );
+StructuredBuffer<float> g_verticesInverseMass : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+
+[numthreads(128, 1, 1)]
+void 
+SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x + startLink;
+	if( DTid.x < numLinks )
+	{	
+		float massLSC = g_linksMassLSC[linkID];
+		float restLengthSquared = g_linksRestLengthSquared[linkID];
+		
+		if( massLSC > 0.0f )
+		{		
+			int2 nodeIndices = g_linksVertexIndices[linkID];
+			int node0 = nodeIndices.x;
+			int node1 = nodeIndices.y;
+			
+			float3 position0 = g_vertexPositions[node0].xyz;
+			float3 position1 = g_vertexPositions[node1].xyz;
+
+			float inverseMass0 = g_verticesInverseMass[node0];
+			float inverseMass1 = g_verticesInverseMass[node1]; 
+
+			float3 del = position1 - position0;
+			float len = dot(del, del);
+			float k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+
+			g_vertexPositions[node0] = float4(position0, 0.f);
+			g_vertexPositions[node1] = float4(position1, 0.f);
+
+		}
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl
new file mode 100644
index 000000000..3cbb352e8
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/SolvePositionsSIMDBatched.hlsl
@@ -0,0 +1,147 @@
+MSTRINGIFY(
+
+
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	int startWaveInBatch;
+	int numWaves;
+	float kst;		
+	float ti;
+};
+
+
+// Number of batches per wavefront stored one element per logical wavefront
+StructuredBuffer<int2> g_wavefrontBatchCountsVertexCounts : register( t0 );
+// Set of up to maxNumVertices vertex addresses per wavefront
+StructuredBuffer<int> g_vertexAddressesPerWavefront : register( t1 );
+
+StructuredBuffer<float> g_verticesInverseMass : register( t2 );
+
+// Per-link data layed out structured in terms of sub batches within wavefronts
+StructuredBuffer<int2> g_linksVertexIndices : register( t3 );
+StructuredBuffer<float> g_linksMassLSC : register( t4 );
+StructuredBuffer<float> g_linksRestLengthSquared : register( t5 );
+
+RWStructuredBuffer<float4> g_vertexPositions : register( u0 );
+
+// Data loaded on a per-wave basis
+groupshared int2 wavefrontBatchCountsVertexCounts[WAVEFRONT_BLOCK_MULTIPLIER];
+groupshared float4 vertexPositionSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
+groupshared float vertexInverseMassSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
+
+// Storing the vertex addresses actually slowed things down a little
+//groupshared int vertexAddressSharedData[MAX_NUM_VERTICES_PER_WAVE*WAVEFRONT_BLOCK_MULTIPLIER];
+
+
+[numthreads(BLOCK_SIZE, 1, 1)]
+void 
+SolvePositionsFromLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	const int laneInWavefront = (DTid.x & (WAVEFRONT_SIZE-1));
+	const int wavefront = startWaveInBatch + (DTid.x / WAVEFRONT_SIZE);
+	const int firstWavefrontInBlock = startWaveInBatch + Gid.x * WAVEFRONT_BLOCK_MULTIPLIER;
+	const int localWavefront = wavefront - firstWavefrontInBlock;
+
+	int batchesWithinWavefront = 0;
+	int verticesUsedByWave = 0;
+	int cond = wavefront < (startWaveInBatch + numWaves);
+
+	// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier	
+	if( cond)
+	{
+
+		// Load the batch counts for the wavefronts
+
+		int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
+
+		batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
+		verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
+
+		// Load the vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			//vertexAddressSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = vertexAddress;
+			vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
+			vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
+		}
+		
+	}
+		// Ensure compiler does not re-order memory operations
+		//AllMemoryBarrier();
+	AllMemoryBarrierWithGroupSync ();
+		
+	if( cond)
+	{
+		// Loop through the batches performing the solve on each in LDS
+		int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;	
+
+		//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
+		
+		int batch = 0;
+		do
+		{
+			int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
+			int locationOfValue = baseDataLocation + laneInWavefront;
+			
+			
+			// These loads should all be perfectly linear across the WF
+			int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
+			float massLSC = g_linksMassLSC[locationOfValue];
+			float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
+			
+
+			// LDS vertex addresses based on logical wavefront number in block and loaded index
+			int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
+			int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
+			
+			float3 position0 = vertexPositionSharedData[vertexAddress0].xyz;
+			float3 position1 = vertexPositionSharedData[vertexAddress1].xyz;
+
+			float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
+			float inverseMass1 = vertexInverseMassSharedData[vertexAddress1]; 
+
+			float3 del = position1 - position0;
+			float len = dot(del, del);
+			
+			float k = 0;
+			if( massLSC > 0.0f )
+			{		
+				k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			}
+			
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+			
+			// Ensure compiler does not re-order memory operations
+			AllMemoryBarrier();				
+
+			vertexPositionSharedData[vertexAddress0] = float4(position0, 0.f);
+			vertexPositionSharedData[vertexAddress1] = float4(position1, 0.f);
+			
+			// Ensure compiler does not re-order memory operations
+			AllMemoryBarrier();
+				
+			
+			++batch;
+		} while( batch < batchesWithinWavefront );
+		
+		// Update the global memory vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			g_vertexPositions[vertexAddress] = vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+		}
+	}
+		
+		
+}
+
+
+
+
+);
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateConstants.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateConstants.hlsl
new file mode 100644
index 000000000..fafd236f9
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateConstants.hlsl
@@ -0,0 +1,48 @@
+MSTRINGIFY(
+
+cbuffer UpdateConstantsCB : register( b0 )
+{
+	int numLinks;
+	int padding0;
+	int padding1;
+	int padding2;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+StructuredBuffer<float> g_vertexInverseMasses : register( t2 );
+StructuredBuffer<float> g_linksMaterialLSC : register( t3 );
+
+RWStructuredBuffer<float> g_linksMassLSC : register( u0 );
+RWStructuredBuffer<float> g_linksRestLengthSquared : register( u1 );
+RWStructuredBuffer<float> g_linksRestLengths : register( u2 );
+
+[numthreads(128, 1, 1)]
+void 
+UpdateConstantsKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x;
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
+		
+		float3 position0 = g_vertexPositions[node0].xyz;
+		float3 position1 = g_vertexPositions[node1].xyz;
+		float inverseMass0 = g_vertexInverseMasses[node0];
+		float inverseMass1 = g_vertexInverseMasses[node1];
+
+		float3 difference = position0 - position1;
+		float length2 = dot(difference, difference);
+		float length = sqrt(length2);
+	
+		g_linksRestLengths[linkID] = length;
+		g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
+		g_linksRestLengthSquared[linkID] = length*length;		
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNodes.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNodes.hlsl
new file mode 100644
index 000000000..a16d89439
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNodes.hlsl
@@ -0,0 +1,49 @@
+MSTRINGIFY(
+
+cbuffer UpdateVelocitiesFromPositionsWithVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float isolverdt;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<int> g_vertexClothIndices : register( t2 );
+StructuredBuffer<float> g_clothVelocityCorrectionCoefficients : register( t3 );
+StructuredBuffer<float> g_clothDampingFactor : register( t4 );
+
+RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
+RWStructuredBuffer<float4> g_vertexForces : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+updateVelocitiesFromPositionsWithVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocities[nodeID].xyz;
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float3 difference = position - previousPosition;
+				
+		velocity += difference*velocityCorrectionCoefficient*isolverdt;
+		
+		// Damp the velocity
+		velocity *= velocityCoefficient;
+		
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);
+		g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNormals.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNormals.hlsl
new file mode 100644
index 000000000..54ab3ed2f
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdateNormals.hlsl
@@ -0,0 +1,98 @@
+MSTRINGIFY(
+
+cbuffer UpdateSoftBodiesCB : register( b0 )
+{
+	unsigned int numNodes;
+	unsigned int startFace;
+	unsigned int numFaces;
+	float epsilon;
+};
+
+
+// Node indices for each link
+StructuredBuffer<int4> g_triangleVertexIndexSet : register( t0 );
+StructuredBuffer<float4> g_vertexPositions : register( t1 );
+StructuredBuffer<int> g_vertexTriangleCount : register( t2 );
+
+RWStructuredBuffer<float4> g_vertexNormals : register( u0 );
+RWStructuredBuffer<float> g_vertexArea : register( u1 );
+RWStructuredBuffer<float4> g_triangleNormals : register( u2 );
+RWStructuredBuffer<float> g_triangleArea : register( u3 );
+
+
+[numthreads(128, 1, 1)]
+void 
+ResetNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	if( DTid.x < numNodes )
+	{
+		g_vertexNormals[DTid.x] = float4(0.0f, 0.0f, 0.0f, 0.0f);
+		g_vertexArea[DTid.x] = 0.0f;
+	}
+}
+
+
+[numthreads(128, 1, 1)]
+void 
+UpdateSoftBodiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int faceID = DTid.x + startFace;
+	if( DTid.x < numFaces )
+	{		
+		int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
+		int nodeIndex0 = triangleIndexSet.x;
+		int nodeIndex1 = triangleIndexSet.y;
+		int nodeIndex2 = triangleIndexSet.z;
+
+		float3 node0 = g_vertexPositions[nodeIndex0].xyz;
+		float3 node1 = g_vertexPositions[nodeIndex1].xyz;
+		float3 node2 = g_vertexPositions[nodeIndex2].xyz;
+		float3 nodeNormal0 = g_vertexNormals[nodeIndex0].xyz;
+		float3 nodeNormal1 = g_vertexNormals[nodeIndex1].xyz;
+		float3 nodeNormal2 = g_vertexNormals[nodeIndex2].xyz;
+		float vertexArea0 = g_vertexArea[nodeIndex0];
+		float vertexArea1 = g_vertexArea[nodeIndex1];
+		float vertexArea2 = g_vertexArea[nodeIndex2];
+		
+		float3 vector0 = node1 - node0;
+		float3 vector1 = node2 - node0;
+		
+		float3 faceNormal = cross(vector0.xyz, vector1.xyz);
+		float triangleArea = length(faceNormal);
+
+		nodeNormal0 = nodeNormal0 + faceNormal;
+		nodeNormal1 = nodeNormal1 + faceNormal;
+		nodeNormal2 = nodeNormal2 + faceNormal;
+		vertexArea0 = vertexArea0 + triangleArea;
+		vertexArea1 = vertexArea1 + triangleArea;
+		vertexArea2 = vertexArea2 + triangleArea;
+		
+		g_triangleNormals[faceID] = float4(normalize(faceNormal), 0.f);
+		g_vertexNormals[nodeIndex0] = float4(nodeNormal0, 0.f);
+		g_vertexNormals[nodeIndex1] = float4(nodeNormal1, 0.f);
+		g_vertexNormals[nodeIndex2] = float4(nodeNormal2, 0.f);
+		g_triangleArea[faceID] = triangleArea;
+		g_vertexArea[nodeIndex0] = vertexArea0;
+		g_vertexArea[nodeIndex1] = vertexArea1;
+		g_vertexArea[nodeIndex2] = vertexArea2;
+	}
+}
+
+[numthreads(128, 1, 1)]
+void 
+NormalizeNormalsAndAreasKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	if( DTid.x < numNodes )
+	{
+		float4 normal = g_vertexNormals[DTid.x];
+		float area = g_vertexArea[DTid.x];
+		int numTriangles = g_vertexTriangleCount[DTid.x];
+		
+		float vectorLength = length(normal);
+		
+		g_vertexNormals[DTid.x] = normalize(normal);
+		g_vertexArea[DTid.x] = area/float(numTriangles);
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositions.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositions.hlsl
new file mode 100644
index 000000000..9685fa8fb
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositions.hlsl
@@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+cbuffer UpdateVelocitiesFromPositionsWithoutVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float isolverdt;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexPositions : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<int> g_vertexClothIndices : register( t2 );
+StructuredBuffer<float> g_clothDampingFactor : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexVelocities : register( u0 );
+RWStructuredBuffer<float4> g_vertexForces : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+updateVelocitiesFromPositionsWithoutVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	if( nodeID < numNodes )
+	{	
+		float3 position = g_vertexPositions[nodeID].xyz;
+		float3 previousPosition = g_vertexPreviousPositions[nodeID].xyz;
+		float3 velocity = g_vertexVelocities[nodeID].xyz;
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float3 difference = position - previousPosition;
+				
+		velocity = difference*velocityCoefficient*isolverdt;		
+		
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);
+		g_vertexForces[nodeID] = float4(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositionsFromVelocities.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositionsFromVelocities.hlsl
new file mode 100644
index 000000000..e816b1e14
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/UpdatePositionsFromVelocities.hlsl
@@ -0,0 +1,35 @@
+MSTRINGIFY(
+
+cbuffer UpdatePositionsFromVelocitiesCB : register( b0 )
+{
+	int numNodes;
+	float solverSDT;
+	int padding1;
+	int padding2;
+};
+
+
+StructuredBuffer<float4> g_vertexVelocities : register( t0 );
+
+RWStructuredBuffer<float4> g_vertexPreviousPositions : register( u0 );
+RWStructuredBuffer<float4> g_vertexCurrentPosition : register( u1 );
+
+
+[numthreads(128, 1, 1)]
+void 
+UpdatePositionsFromVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int vertexID = DTid.x;
+	if( vertexID < numNodes )
+	{	
+		float3 previousPosition = g_vertexPreviousPositions[vertexID].xyz;
+		float3 velocity = g_vertexVelocities[vertexID].xyz;
+		
+		float3 newPosition = previousPosition + velocity*solverSDT;
+		
+		g_vertexCurrentPosition[vertexID] = float4(newPosition, 0.f);
+		g_vertexPreviousPositions[vertexID] = float4(newPosition, 0.f);
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/VSolveLinks.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/VSolveLinks.hlsl
new file mode 100644
index 000000000..14afca674
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/VSolveLinks.hlsl
@@ -0,0 +1,55 @@
+MSTRINGIFY(
+
+cbuffer VSolveLinksCB : register( b0 )
+{
+	int startLink;
+	int numLinks;
+	float kst;
+	int padding;
+};
+
+// Node indices for each link
+StructuredBuffer<int2> g_linksVertexIndices : register( t0 );
+
+StructuredBuffer<float> g_linksLengthRatio : register( t1 );
+StructuredBuffer<float4> g_linksCurrentLength : register( t2 );
+StructuredBuffer<float> g_vertexInverseMass : register( t3 );
+
+RWStructuredBuffer<float4> g_vertexVelocity : register( u0 );
+
+[numthreads(128, 1, 1)]
+void 
+VSolveLinksKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int linkID = DTid.x + startLink;
+	if( DTid.x < numLinks )
+	{		
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float linkLengthRatio = g_linksLengthRatio[linkID];
+		float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
+		
+		float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
+		float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
+
+		float vertexInverseMass0 = g_vertexInverseMass[node0];
+		float vertexInverseMass1 = g_vertexInverseMass[node1]; 
+
+		float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
+		float dotResult = dot(linkCurrentLength, nodeDifference);
+		float j = -dotResult*linkLengthRatio*kst;
+		
+		float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
+		float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
+		
+		vertexVelocity0 += velocityChange0;
+		vertexVelocity1 -= velocityChange1;
+
+		g_vertexVelocity[node0] = float4(vertexVelocity0, 0.f);
+		g_vertexVelocity[node1] = float4(vertexVelocity1, 0.f);
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocities.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocities.hlsl
new file mode 100644
index 000000000..9d46a5969
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocities.hlsl
@@ -0,0 +1,170 @@
+MSTRINGIFY(
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	unsigned int numNodes;
+	float isolverdt;
+	int padding0;
+	int padding1;
+};
+
+struct CollisionObjectIndices
+{
+	int firstObject;
+	int endObject;
+};
+
+struct CollisionShapeDescription
+{
+	float4x4 shapeTransform;
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	int padding1;
+	
+};
+
+// From btBroadphaseProxy.h
+static const int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Node indices for each link
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<float> g_perClothFriction : register( t2 );
+StructuredBuffer<float> g_clothDampingFactor : register( t3 );
+StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
+StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
+
+RWStructuredBuffer<float4> g_vertexForces : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
+RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
+
+[numthreads(128, 1, 1)]
+void 
+SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	float3 forceOnVertex = float3(0.f, 0.f, 0.f);
+	if( DTid.x < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
+		float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+		float3 velocity;
+		float clothFriction = g_perClothFriction[clothIdentifier];
+		float dampingFactor = g_clothDampingFactor[clothIdentifier];
+		float velocityCoefficient = (1.f - dampingFactor);		
+		CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+		if( collisionObjectIndices.firstObject != collisionObjectIndices.endObject )
+		{
+			velocity = float3(15, 0, 0);
+
+			// We have some possible collisions to deal with
+			for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+			{
+				CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+				float colliderFriction = shapeDescription.friction;
+		
+				if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+				{
+					// Colliding with a capsule
+
+					float capsuleHalfHeight = shapeDescription.halfHeight;
+					float capsuleRadius = shapeDescription.radius;
+					float capsuleMargin = shapeDescription.margin;
+					float4x4 worldTransform = shapeDescription.shapeTransform;
+
+					float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f); 
+					float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
+					float4 worldC1 = mul(worldTransform, c1);
+					float4 worldC2 = mul(worldTransform, c2);
+					float3 segment = (worldC2 - worldC1).xyz;
+
+					// compute distance of tangent to vertex along line segment in capsule
+					float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
+
+					float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
+					float distanceFromLine = length(position - closestPoint);
+					float distanceFromC1 = length(worldC1 - position);
+					float distanceFromC2 = length(worldC2 - position);
+					
+					// Final distance from collision, point to push from, direction to push in
+					// for impulse force
+					float dist;
+					float3 normalVector;
+					if( distanceAlongSegment < 0 )
+					{
+						dist = distanceFromC1;
+						normalVector = normalize(position - worldC1).xyz;
+					} else if( distanceAlongSegment > 1.f ) {
+						dist = distanceFromC2;
+						normalVector = normalize(position - worldC2).xyz;	
+					} else {
+						dist = distanceFromLine;
+						normalVector = normalize(position - closestPoint).xyz;
+					}
+						
+					float3 colliderLinearVelocity = shapeDescription.linearVelocity.xyz;
+					float3 colliderAngularVelocity = shapeDescription.angularVelocity.xyz;
+					float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
+
+					float minDistance = capsuleRadius + capsuleMargin;
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+					
+					
+					// Check for a collision
+					if( dist < minDistance )
+					{
+						// Project back to surface along normal
+						position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
+						velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+						float3 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+						float3 p1 = normalize(cross(normalVector, segment));
+						float3 p2 = normalize(cross(p1, normalVector));
+						// Full friction is sum of velocities in each direction of plane
+						float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
+
+						// Real friction is peak friction corrected by friction coefficients
+						frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+						float approachSpeed = dot(relativeVelocity, normalVector);
+
+						if( approachSpeed <= 0.0 )
+							forceOnVertex -= frictionVector;
+					}
+					
+				}
+			}
+		} else {
+			// Update velocity	
+			float3 difference = position.xyz - previousPosition.xyz;
+			velocity = difference*velocityCoefficient*isolverdt;			
+		}
+
+		g_vertexVelocities[nodeID] = float4(velocity, 0.f);	
+
+		// Update external force
+		g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
+
+		g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
+	}
+}
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl
new file mode 100644
index 000000000..0b2a0271a
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl
@@ -0,0 +1,191 @@
+MSTRINGIFY(
+
+cbuffer SolvePositionsFromLinksKernelCB : register( b0 )
+{
+	unsigned int numNodes;
+	float isolverdt;
+	int padding0;
+	int padding1;
+};
+
+struct CollisionObjectIndices
+{
+	int firstObject;
+	int endObject;
+};
+
+struct CollisionShapeDescription
+{
+	float4x4 shapeTransform;
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	int padding1;
+	
+};
+
+// From btBroadphaseProxy.h
+static const int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Node indices for each link
+StructuredBuffer<int> g_vertexClothIdentifier : register( t0 );
+StructuredBuffer<float4> g_vertexPreviousPositions : register( t1 );
+StructuredBuffer<float> g_perClothFriction : register( t2 );
+StructuredBuffer<float> g_clothDampingFactor : register( t3 );
+StructuredBuffer<CollisionObjectIndices> g_perClothCollisionObjectIndices : register( t4 );
+StructuredBuffer<CollisionShapeDescription> g_collisionObjectDetails : register( t5 );
+
+RWStructuredBuffer<float4> g_vertexForces : register( u0 );
+RWStructuredBuffer<float4> g_vertexVelocities : register( u1 );
+RWStructuredBuffer<float4> g_vertexPositions : register( u2 );
+
+// A buffer of local collision shapes
+// TODO: Iterate to support more than 16
+groupshared CollisionShapeDescription localCollisionShapes[16];
+
+[numthreads(128, 1, 1)]
+void 
+SolveCollisionsAndUpdateVelocitiesKernel( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )
+{
+	int nodeID = DTid.x;
+	float3 forceOnVertex = float3(0.f, 0.f, 0.f);
+
+	int clothIdentifier = g_vertexClothIdentifier[nodeID];
+	float4 position = float4(g_vertexPositions[nodeID].xyz, 1.f);
+	float4 previousPosition = float4(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+	float3 velocity;
+	float clothFriction = g_perClothFriction[clothIdentifier];
+	float dampingFactor = g_clothDampingFactor[clothIdentifier];
+	float velocityCoefficient = (1.f - dampingFactor);		
+	CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+	int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+	if( numObjects > 0 )
+	{
+		// We have some possible collisions to deal with
+		
+		// First load all of the collision objects into LDS
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		if( GTid.x < numObjects )
+		{
+			localCollisionShapes[GTid.x] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + GTid.x ];
+		}
+	}
+
+	// Safe as the vertices are padded so that not more than one soft body is in a group
+	AllMemoryBarrierWithGroupSync();
+
+	// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
+	if( numObjects > 0 )
+	{
+		velocity = float3(0, 0, 0);
+		
+		
+		// We have some possible collisions to deal with
+		for( int collision = 0; collision < numObjects; ++collision )
+		{
+			CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
+			float colliderFriction = shapeDescription.friction;
+		
+			if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+			{
+				// Colliding with a capsule
+
+				float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
+				float capsuleRadius = localCollisionShapes[collision].radius;
+				float capsuleMargin = localCollisionShapes[collision].margin;
+
+				float4x4 worldTransform = localCollisionShapes[collision].shapeTransform;
+
+				float4 c1 = float4(0.f, -capsuleHalfHeight, 0.f, 1.f); 
+				float4 c2 = float4(0.f, +capsuleHalfHeight, 0.f, 1.f);
+				float4 worldC1 = mul(worldTransform, c1);
+				float4 worldC2 = mul(worldTransform, c2);
+				float3 segment = (worldC2 - worldC1).xyz;
+
+				// compute distance of tangent to vertex along line segment in capsule
+				float distanceAlongSegment = -( dot( (worldC1 - position).xyz, segment ) / dot(segment, segment) );
+
+				float4 closestPoint = (worldC1 + float4(segment * distanceAlongSegment, 0.f));
+				float distanceFromLine = length(position - closestPoint);
+				float distanceFromC1 = length(worldC1 - position);
+				float distanceFromC2 = length(worldC2 - position);
+					
+				// Final distance from collision, point to push from, direction to push in
+				// for impulse force
+				float dist;
+				float3 normalVector;
+				if( distanceAlongSegment < 0 )
+				{
+					dist = distanceFromC1;
+					normalVector = normalize(position - worldC1).xyz;
+				} else if( distanceAlongSegment > 1.f ) {
+					dist = distanceFromC2;
+					normalVector = normalize(position - worldC2).xyz;	
+				} else {
+					dist = distanceFromLine;
+					normalVector = normalize(position - closestPoint).xyz;
+				}
+						
+				float3 colliderLinearVelocity = localCollisionShapes[collision].linearVelocity.xyz;
+				float3 colliderAngularVelocity = localCollisionShapes[collision].angularVelocity.xyz;
+				float3 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position.xyz - worldTransform._m03_m13_m23);
+
+				float minDistance = capsuleRadius + capsuleMargin;
+					
+				// In case of no collision, this is the value of velocity
+				velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+					
+					
+				// Check for a collision
+				if( dist < minDistance )
+				{
+					// Project back to surface along normal
+					position = position + float4((minDistance - dist)*normalVector*0.9, 0.f);
+					velocity = (position - previousPosition).xyz * velocityCoefficient * isolverdt;
+					float3 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+					float3 p1 = normalize(cross(normalVector, segment));
+					float3 p2 = normalize(cross(p1, normalVector));
+					// Full friction is sum of velocities in each direction of plane
+					float3 frictionVector = p1*dot(relativeVelocity, p1) + p2*dot(relativeVelocity, p2);
+
+					// Real friction is peak friction corrected by friction coefficients
+					frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+					float approachSpeed = dot(relativeVelocity, normalVector);
+
+					if( approachSpeed <= 0.0 )
+						forceOnVertex -= frictionVector;
+				}
+					
+			}
+		}
+	} else {
+		// Update velocity	
+		float3 difference = position.xyz - previousPosition.xyz;
+		velocity = difference*velocityCoefficient*isolverdt;			
+	}
+
+	g_vertexVelocities[nodeID] = float4(velocity, 0.f);	
+
+	// Update external force
+	g_vertexForces[nodeID] = float4(forceOnVertex, 0.f);
+
+	g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
+}
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h
new file mode 100644
index 000000000..b6a99cc1d
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverBuffer_DX11.h
@@ -0,0 +1,323 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+#ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
+#define BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
+
+// DX11 support
+#include <windows.h>
+#include <crtdbg.h>
+#include <d3d11.h>
+#include <d3dx11.h>
+#include <d3dcompiler.h>
+
+#ifndef SAFE_RELEASE
+#define SAFE_RELEASE(p)      { if(p) { (p)->Release(); (p)=NULL; } }
+#endif
+
+/**
+ * DX11 Buffer that tracks a host buffer on use to ensure size-correctness.
+ */
+template <typename ElementType> class btDX11Buffer
+{
+protected:
+	ID3D11Device*				m_d3dDevice;
+	ID3D11DeviceContext*		m_d3dDeviceContext;
+
+	ID3D11Buffer*               m_Buffer;
+	ID3D11ShaderResourceView*   m_SRV;
+	ID3D11UnorderedAccessView*  m_UAV;
+	btAlignedObjectArray< ElementType >*	m_CPUBuffer;
+
+	// TODO: Separate this from the main class
+	// as read back buffers can be shared between buffers
+	ID3D11Buffer*               m_readBackBuffer;
+
+	int m_gpuSize;
+	bool m_onGPU;
+
+	bool m_readOnlyOnGPU;
+	
+	bool createBuffer( ID3D11Buffer *preexistingBuffer = 0)
+	{
+		HRESULT hr = S_OK;
+
+		// Create all CS buffers
+		if( preexistingBuffer )
+		{
+			m_Buffer = preexistingBuffer;
+		} else {
+			D3D11_BUFFER_DESC buffer_desc;
+			ZeroMemory(&buffer_desc, sizeof(buffer_desc));		
+			buffer_desc.Usage = D3D11_USAGE_DEFAULT;
+			if( m_readOnlyOnGPU )
+				buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
+			else
+				buffer_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
+			buffer_desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
+			
+			buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
+			// At a minimum the buffer must exist
+			if( buffer_desc.ByteWidth == 0 )
+				buffer_desc.ByteWidth = sizeof(ElementType);
+			buffer_desc.StructureByteStride = sizeof(ElementType);
+			hr = m_d3dDevice->CreateBuffer(&buffer_desc, NULL, &m_Buffer);
+			if( FAILED( hr ) )
+		        return (hr==S_OK);
+		} 
+
+		if( m_readOnlyOnGPU )
+		{
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
+			ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
+			srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
+
+			srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
+			if( srvbuffer_desc.Buffer.ElementWidth == 0 )
+				srvbuffer_desc.Buffer.ElementWidth = 1;
+			hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+		} else {
+			// Create SRV
+			D3D11_SHADER_RESOURCE_VIEW_DESC srvbuffer_desc;
+			ZeroMemory(&srvbuffer_desc, sizeof(srvbuffer_desc));
+			srvbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			srvbuffer_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
+
+			srvbuffer_desc.Buffer.ElementWidth = m_CPUBuffer->size();
+			if( srvbuffer_desc.Buffer.ElementWidth == 0 )
+				srvbuffer_desc.Buffer.ElementWidth = 1;
+			hr = m_d3dDevice->CreateShaderResourceView(m_Buffer, &srvbuffer_desc, &m_SRV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+
+			// Create UAV
+			D3D11_UNORDERED_ACCESS_VIEW_DESC uavbuffer_desc;
+			ZeroMemory(&uavbuffer_desc, sizeof(uavbuffer_desc));
+			uavbuffer_desc.Format = DXGI_FORMAT_UNKNOWN;
+			uavbuffer_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
+
+			uavbuffer_desc.Buffer.NumElements = m_CPUBuffer->size();
+			if( uavbuffer_desc.Buffer.NumElements == 0 )
+				uavbuffer_desc.Buffer.NumElements = 1;
+			hr = m_d3dDevice->CreateUnorderedAccessView(m_Buffer, &uavbuffer_desc, &m_UAV);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+
+			// Create read back buffer
+			D3D11_BUFFER_DESC readback_buffer_desc;
+			ZeroMemory(&readback_buffer_desc, sizeof(readback_buffer_desc));
+
+			readback_buffer_desc.ByteWidth = m_CPUBuffer->size() * sizeof(ElementType);
+			readback_buffer_desc.Usage = D3D11_USAGE_STAGING;
+			readback_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+			readback_buffer_desc.StructureByteStride = sizeof(ElementType);
+			hr = m_d3dDevice->CreateBuffer(&readback_buffer_desc, NULL, &m_readBackBuffer);
+			if( FAILED( hr ) )
+				return (hr==S_OK);
+		}
+
+		m_gpuSize = m_CPUBuffer->size();
+		return true;
+	}
+
+
+
+public:
+	btDX11Buffer( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext, btAlignedObjectArray< ElementType > *CPUBuffer, bool readOnly )
+	{
+		m_d3dDevice = d3dDevice;
+		m_d3dDeviceContext = d3dDeviceContext;
+		m_Buffer = 0;
+		m_SRV = 0;
+		m_UAV = 0;
+		m_readBackBuffer = 0;
+
+		m_CPUBuffer = CPUBuffer;
+
+		m_gpuSize = 0;
+		m_onGPU = false;
+
+		m_readOnlyOnGPU = readOnly;
+	}
+
+	virtual ~btDX11Buffer()
+	{
+		SAFE_RELEASE(m_Buffer);
+		SAFE_RELEASE(m_SRV);
+		SAFE_RELEASE(m_UAV);
+		SAFE_RELEASE(m_readBackBuffer);
+	}
+
+	ID3D11ShaderResourceView* &getSRV()
+	{
+		return m_SRV;
+	}
+
+	ID3D11UnorderedAccessView* &getUAV()
+	{
+		return m_UAV;
+	}
+
+	ID3D11Buffer* &getBuffer()
+	{
+		return m_Buffer;
+	}
+
+	/**
+	 * Move the data to the GPU if it is not there already.
+	 */
+	bool moveToGPU()
+	{
+		// Reallocate if GPU size is too small
+		if( (m_CPUBuffer->size() > m_gpuSize ) )
+			m_onGPU = false;
+		if( !m_onGPU && m_CPUBuffer->size() > 0 )
+		{
+			// If the buffer doesn't exist or the CPU-side buffer has changed size, create
+			// We should really delete the old one, too, but let's leave that for later
+			if( !m_Buffer || (m_CPUBuffer->size() != m_gpuSize) )
+			{
+				SAFE_RELEASE(m_Buffer);
+				SAFE_RELEASE(m_SRV);
+				SAFE_RELEASE(m_UAV);
+				SAFE_RELEASE(m_readBackBuffer);
+				if( !createBuffer() )
+				{
+					btAssert("Buffer creation failed.");
+					return false;
+				}
+			}
+
+			if( m_gpuSize > 0 )
+			{
+				D3D11_BOX destRegion;
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->UpdateSubresource(m_Buffer, 0, &destRegion, &((*m_CPUBuffer)[0]), 0, 0);
+
+				m_onGPU = true;
+			}
+
+		}
+
+		return true;
+	}
+
+	/**
+	 * Move the data back from the GPU if it is on there and isn't read only.
+	 */
+	bool moveFromGPU()
+	{
+		if( m_CPUBuffer->size() > 0 )
+		{
+			if( m_onGPU && !m_readOnlyOnGPU )
+			{
+				// Copy back
+				D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; 
+				//m_pd3dImmediateContext->CopyResource(m_phAngVelReadBackBuffer, m_phAngVel);
+
+				D3D11_BOX destRegion;	
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->CopySubresourceRegion(
+					m_readBackBuffer,
+					0,
+					0,
+					0,
+					0 ,
+					m_Buffer,
+					0,
+					&destRegion
+					);
+
+				m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);   
+				//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
+				memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));		
+				m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
+
+				m_onGPU = false;
+			}
+		}
+
+		return true;
+	}
+
+
+	/**
+	 * Copy the data back from the GPU without changing its state to be CPU-side.
+	 * Useful if we just want to view it on the host for visualization.
+	 */
+	bool copyFromGPU()
+	{
+		if( m_CPUBuffer->size() > 0 )
+		{
+			if( m_onGPU && !m_readOnlyOnGPU )
+			{
+				// Copy back
+				D3D11_MAPPED_SUBRESOURCE MappedResource = {0}; 
+
+				D3D11_BOX destRegion;	
+				destRegion.left = 0;
+				destRegion.front = 0;
+				destRegion.top = 0;
+				destRegion.bottom = 1;
+				destRegion.back = 1;
+
+				destRegion.right = (m_CPUBuffer->size())*sizeof(ElementType);
+				m_d3dDeviceContext->CopySubresourceRegion(
+					m_readBackBuffer,
+					0,
+					0,
+					0,
+					0 ,
+					m_Buffer,
+					0,
+					&destRegion
+					);
+
+				m_d3dDeviceContext->Map(m_readBackBuffer, 0, D3D11_MAP_READ, 0, &MappedResource);   
+				//memcpy(m_hAngVel, MappedResource.pData, (m_maxObjs * sizeof(float) ));
+				memcpy(&((*m_CPUBuffer)[0]), MappedResource.pData, ((m_CPUBuffer->size()) * sizeof(ElementType) ));		
+				m_d3dDeviceContext->Unmap(m_readBackBuffer, 0);
+			}
+		}
+
+		return true;
+	}
+
+	/**
+	 * Call if data has changed on the CPU.
+	 * Can then trigger a move to the GPU as necessary.
+	 */
+	virtual void changedOnCPU()
+	{
+		m_onGPU = false;
+	}
+}; // class btDX11Buffer
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_DX11_H
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h
new file mode 100644
index 000000000..454c3c8cc
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11.h
@@ -0,0 +1,103 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+
+class btSoftBodyLinkDataDX11 : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+
+	btDX11Buffer<LinkNodePair>				m_dx11Links;
+	btDX11Buffer<float>											m_dx11LinkStrength;
+	btDX11Buffer<float>											m_dx11LinksMassLSC;
+	btDX11Buffer<float>											m_dx11LinksRestLengthSquared;
+	btDX11Buffer<Vectormath::Aos::Vector3>						m_dx11LinksCLength;
+	btDX11Buffer<float>											m_dx11LinksLengthRatio;
+	btDX11Buffer<float>											m_dx11LinksRestLength;
+	btDX11Buffer<float>											m_dx11LinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+	
+	btSoftBodyLinkDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyLinkDataDX11();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_DX11_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h
new file mode 100644
index 000000000..6eb26c68e
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverLinkData_DX11SIMDAware.h
@@ -0,0 +1,173 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+#ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
+#define BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+
+class btSoftBodyLinkDataDX11SIMDAware : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+	const int m_wavefrontSize;
+	const int m_linksPerWorkItem;
+	const int m_maxLinksPerWavefront;
+	int m_maxBatchesWithinWave;
+	int m_maxVerticesWithinWave;
+	int m_numWavefronts;
+
+	int m_maxVertex;
+
+	struct NumBatchesVerticesPair
+	{
+		int numBatches;
+		int numVertices;
+	};
+
+	// Array storing number of links in each wavefront
+	btAlignedObjectArray<int>									m_linksPerWavefront;
+	btAlignedObjectArray<NumBatchesVerticesPair>				m_numBatchesAndVerticesWithinWaves;
+	btDX11Buffer< NumBatchesVerticesPair >						m_dx11NumBatchesAndVerticesWithinWaves;
+
+	// All arrays here will contain batches of m_maxLinksPerWavefront links
+	// ordered by wavefront.
+	// with either global vertex pairs or local vertex pairs
+	btAlignedObjectArray< int >									m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
+	btDX11Buffer<int>											m_dx11WavefrontVerticesGlobalAddresses;
+	btAlignedObjectArray< LinkNodePair >						m_linkVerticesLocalAddresses; // Vertex pair for the link
+	btDX11Buffer<LinkNodePair>									m_dx11LinkVerticesLocalAddresses;
+	btDX11Buffer<float>											m_dx11LinkStrength;
+	btDX11Buffer<float>											m_dx11LinksMassLSC;
+	btDX11Buffer<float>											m_dx11LinksRestLengthSquared;
+	btDX11Buffer<float>											m_dx11LinksRestLength;
+	btDX11Buffer<float>											m_dx11LinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_wavefrontBatchStartLengths;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+	
+	btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyLinkDataDX11SIMDAware();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+
+	int getMaxVerticesPerWavefront()
+	{
+		return m_maxVerticesWithinWave;
+	}
+
+	int getWavefrontSize()
+	{
+		return m_wavefrontSize;
+	}
+
+	int getLinksPerWorkItem()
+	{
+		return m_linksPerWorkItem;
+	}
+
+	int getMaxLinksPerWavefront()
+	{
+		return m_maxLinksPerWavefront;
+	}
+
+	int getMaxBatchesPerWavefront()
+	{
+		return m_maxBatchesWithinWave;
+	}
+
+	int getNumWavefronts()
+	{
+		return m_numWavefronts;
+	}
+
+	NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
+	{
+		return m_numBatchesAndVerticesWithinWaves[wavefront];
+	}
+
+	int getVertexGlobalAddresses( int vertexIndex )
+	{
+		return m_wavefrontVerticesGlobalAddresses[vertexIndex];
+	}
+
+	/**
+	 * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
+	 */
+	LinkNodePair getVertexPairLocalAddresses( int linkIndex )
+	{
+		return m_linkVerticesLocalAddresses[linkIndex];
+	}
+
+};
+
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_LINK_DATA_DX11_SIMDAWARE_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h
new file mode 100644
index 000000000..7012fabd4
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverTriangleData_DX11.h
@@ -0,0 +1,96 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
+#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+class btSoftBodyTriangleDataDX11 : public btSoftBodyTriangleData
+{
+public:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+	btDX11Buffer<btSoftBodyTriangleData::TriangleNodeSet>							m_dx11VertexIndices;
+	btDX11Buffer<float>									m_dx11Area;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11Normal;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_triangleAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+	//ID3D11Buffer*               readBackBuffer;
+
+public:
+	btSoftBodyTriangleDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+
+	virtual ~btSoftBodyTriangleDataDX11();
+
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createTriangles( int numTriangles );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
+
+	virtual bool onAccelerator();
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+	/**
+	 * Generate (and later update) the batching for the entire triangle set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_DX11_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h
new file mode 100644
index 000000000..66bd90fa7
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexBuffer_DX11.h
@@ -0,0 +1,107 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H 
+
+
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+
+#include <windows.h>
+#include <crtdbg.h>
+#include <d3d11.h>
+#include <d3dx11.h>
+#include <d3dcompiler.h>
+
+class btDX11VertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	/** Context of the DX11 device on which the vertex buffer is stored. */
+	ID3D11DeviceContext* m_context;
+	/** DX11 vertex buffer */
+	ID3D11Buffer* m_vertexBuffer;
+	/** UAV for DX11 buffer */
+	ID3D11UnorderedAccessView*  m_vertexBufferUAV;
+
+
+public:
+	/**
+	 * buffer is a pointer to the DX11 buffer to place the vertex data in.
+	 * UAV is a pointer to the UAV representation of the buffer laid out in floats.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride )
+	{
+		m_context = context;
+		m_vertexBuffer = buffer;
+		m_vertexBufferUAV = UAV;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+	}
+
+	/**
+	 * buffer is a pointer to the DX11 buffer to place the vertex data in.
+	 * UAV is a pointer to the UAV representation of the buffer laid out in floats.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 * normalOffset is the offset in floats to the first normal.
+	 * normalStride is the stride in floats between normals.
+	 */
+	btDX11VertexBufferDescriptor( ID3D11DeviceContext* context, ID3D11Buffer* buffer, ID3D11UnorderedAccessView *UAV, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+		m_context = context;
+		m_vertexBuffer = buffer;
+		m_vertexBufferUAV = UAV;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+		
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+	}
+
+	virtual ~btDX11VertexBufferDescriptor()
+	{
+
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return DX11_BUFFER;
+	}
+
+	virtual ID3D11DeviceContext* getContext() const
+	{
+		return m_context;
+	}
+
+	virtual ID3D11Buffer* getbtDX11Buffer() const
+	{
+		return m_vertexBuffer;
+	}
+
+	virtual ID3D11UnorderedAccessView* getDX11UAV() const
+	{
+		return m_vertexBufferUAV;
+	}		
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_DX11_H
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h
new file mode 100644
index 000000000..dd7cc84ce
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolverVertexData_DX11.h
@@ -0,0 +1,63 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_DX11.h"
+
+
+#ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+#define BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+
+class btSoftBodyLinkData;
+class btSoftBodyLinkData::LinkDescription;
+
+struct ID3D11Device;
+struct ID3D11DeviceContext;
+
+class btSoftBodyVertexDataDX11 : public btSoftBodyVertexData
+{
+protected:
+	bool				m_onGPU;
+	ID3D11Device		*m_d3dDevice;
+	ID3D11DeviceContext *m_d3dDeviceContext;
+
+public:
+	btDX11Buffer<int>										m_dx11ClothIdentifier;
+	btDX11Buffer<Vectormath::Aos::Point3>					m_dx11VertexPosition;
+	btDX11Buffer<Vectormath::Aos::Point3>					m_dx11VertexPreviousPosition;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexVelocity;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexForceAccumulator;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11VertexNormal;
+	btDX11Buffer<float>									m_dx11VertexInverseMass;
+	btDX11Buffer<float>									m_dx11VertexArea;
+	btDX11Buffer<int>										m_dx11VertexTriangleCount;
+
+
+	//ID3D11Buffer*               readBackBuffer;
+
+public:
+	btSoftBodyVertexDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext );
+	virtual ~btSoftBodyVertexDataDX11();
+
+	virtual bool onAccelerator();
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
+};
+
+
+#endif // #ifndef BT_SOFT_BHODY_SOLVER_VERTEX_DATA_DX11_H
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp
new file mode 100644
index 000000000..357c4089e
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.cpp
@@ -0,0 +1,2236 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+
+#include "btSoftBodySolver_DX11.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include <stdio.h> //printf
+#define MSTRINGIFY(A) #A
+static char* PrepareLinksHLSLString = 
+#include "HLSL/PrepareLinks.hlsl"
+static char* UpdatePositionsFromVelocitiesHLSLString = 
+#include "HLSL/UpdatePositionsFromVelocities.hlsl"
+static char* SolvePositionsHLSLString = 
+#include "HLSL/SolvePositions.hlsl"
+static char* UpdateNodesHLSLString = 
+#include "HLSL/UpdateNodes.hlsl"
+static char* UpdatePositionsHLSLString = 
+#include "HLSL/UpdatePositions.hlsl"
+static char* UpdateConstantsHLSLString = 
+#include "HLSL/UpdateConstants.hlsl"
+static char* IntegrateHLSLString = 
+#include "HLSL/Integrate.hlsl"
+static char* ApplyForcesHLSLString = 
+#include "HLSL/ApplyForces.hlsl"
+static char* UpdateNormalsHLSLString = 
+#include "HLSL/UpdateNormals.hlsl"
+static char* OutputToVertexArrayHLSLString = 
+#include "HLSL/OutputToVertexArray.hlsl"
+static char* VSolveLinksHLSLString = 
+#include "HLSL/VSolveLinks.hlsl"
+static char* ComputeBoundsHLSLString = 
+#include "HLSL/ComputeBounds.hlsl"
+static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
+#include "HLSL/SolveCollisionsAndUpdateVelocities.hlsl"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
+
+btSoftBodyLinkDataDX11::btSoftBodyLinkDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+		m_dx11Links( d3dDevice, d3dDeviceContext, &m_links, false ),
+		m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, false ),
+		m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, false ),
+		m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, false ),
+		m_dx11LinksCLength( d3dDevice, d3dDeviceContext, &m_linksCLength, false ),
+		m_dx11LinksLengthRatio( d3dDevice, d3dDeviceContext, &m_linksLengthRatio, false ),
+		m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, false ),
+		m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, false )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyLinkDataDX11::~btSoftBodyLinkDataDX11()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+void btSoftBodyLinkDataDX11::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+void btSoftBodyLinkDataDX11::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataDX11::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataDX11::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11Links.moveToGPU();
+	success = success && m_dx11LinkStrength.moveToGPU();
+	success = success && m_dx11LinksMassLSC.moveToGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveToGPU();
+	success = success && m_dx11LinksCLength.moveToGPU();
+	success = success && m_dx11LinksLengthRatio.moveToGPU();
+	success = success && m_dx11LinksRestLength.moveToGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyLinkDataDX11::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11Links.moveFromGPU();
+	success = success && m_dx11LinkStrength.moveFromGPU();
+	success = success && m_dx11LinksMassLSC.moveFromGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveFromGPU();
+	success = success && m_dx11LinksCLength.moveFromGPU();
+	success = success && m_dx11LinksLengthRatio.moveFromGPU();
+	success = success && m_dx11LinksRestLength.moveFromGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success )
+		m_onGPU = false;
+
+	return success;
+}
+
+void btSoftBodyLinkDataDX11::generateBatches()
+{
+	int numLinks = getNumLinks();
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numLinks, 0 );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		int vertex0 = getVertexPair(linkIndex).vertex0;
+		int vertex1 = getVertexPair(linkIndex).vertex1;
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+
+
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the links attached to either of the connected 
+	// nodes is in
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{				
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		int vertex0 = getVertexPair(linkLocation).vertex0;
+		int vertex1 = getVertexPair(linkLocation).vertex1;
+
+		// Get the two node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+
+		// Choose the minimum colour that is in neither list
+		int colour = 0;
+		while( colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || colourListVertex1.findLinearSearch(colour) != colourListVertex1.size()  )
+			++colour;
+		// i should now be the minimum colour in neither list
+		// Add to the two lists so that future edges don't share
+		// And store the colour against this edge
+
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		batchValues[linkIndex] = colour;
+	}
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numLinks; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+	m_batchStartLengths.resize(batchCounts.size());
+	if( m_batchStartLengths.size() > 0 )
+	{
+		m_batchStartLengths[0] = BatchPair( 0, 0 );
+
+		int sum = 0;
+		for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+		{
+			m_batchStartLengths[batchIndex].start = sum;
+			m_batchStartLengths[batchIndex].length = batchCounts[batchIndex];
+			sum += batchCounts[batchIndex];
+		}
+	}
+
+	/////////////////////////////
+	// Sort data based on batches
+
+	// Create source arrays by copying originals
+	btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>				m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[linkIndex];
+		int newLocation = m_batchStartLengths[batch].start + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_links[newLocation] = m_links_Backup[linkLocation];
+#if 1
+		m_linkStrength[newLocation] = m_linkStrength_Backup[linkLocation];
+		m_linksMassLSC[newLocation] = m_linksMassLSC_Backup[linkLocation];
+		m_linksRestLengthSquared[newLocation] = m_linksRestLengthSquared_Backup[linkLocation];
+		m_linksLengthRatio[newLocation] = m_linksLengthRatio_Backup[linkLocation];
+		m_linksRestLength[newLocation] = m_linksRestLength_Backup[linkLocation];
+		m_linksMaterialLinearStiffnessCoefficient[newLocation] = m_linksMaterialLinearStiffnessCoefficient_Backup[linkLocation];
+#endif
+		// Update the locations array to account for the moved entry
+		m_linkAddresses[linkIndex] = newLocation;
+	}
+} // void btSoftBodyLinkDataDX11::generateBatches()
+
+
+
+btSoftBodyVertexDataDX11::btSoftBodyVertexDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+	m_dx11ClothIdentifier( d3dDevice, d3dDeviceContext, &m_clothIdentifier, false ),
+	m_dx11VertexPosition( d3dDevice, d3dDeviceContext, &m_vertexPosition, false ),
+	m_dx11VertexPreviousPosition( d3dDevice, d3dDeviceContext, &m_vertexPreviousPosition, false ),
+	m_dx11VertexVelocity( d3dDevice, d3dDeviceContext, &m_vertexVelocity, false ),
+	m_dx11VertexForceAccumulator( d3dDevice, d3dDeviceContext, &m_vertexForceAccumulator, false ),
+	m_dx11VertexNormal( d3dDevice, d3dDeviceContext, &m_vertexNormal, false ),
+	m_dx11VertexInverseMass( d3dDevice, d3dDeviceContext, &m_vertexInverseMass, false ),
+	m_dx11VertexArea( d3dDevice, d3dDeviceContext, &m_vertexArea, false ),
+	m_dx11VertexTriangleCount( d3dDevice, d3dDeviceContext, &m_vertexTriangleCount, false )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyVertexDataDX11::~btSoftBodyVertexDataDX11()
+{
+
+}
+
+bool btSoftBodyVertexDataDX11::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyVertexDataDX11::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11ClothIdentifier.moveToGPU();
+	success = success && m_dx11VertexPosition.moveToGPU();
+	success = success && m_dx11VertexPreviousPosition.moveToGPU();
+	success = success && m_dx11VertexVelocity.moveToGPU();
+	success = success && m_dx11VertexForceAccumulator.moveToGPU();
+	success = success && m_dx11VertexNormal.moveToGPU();
+	success = success && m_dx11VertexInverseMass.moveToGPU();
+	success = success && m_dx11VertexArea.moveToGPU();
+	success = success && m_dx11VertexTriangleCount.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyVertexDataDX11::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
+{
+	bool success = true;
+
+	if (!bCopy)
+	{
+		success = success && m_dx11ClothIdentifier.moveFromGPU();
+		success = success && m_dx11VertexPosition.moveFromGPU();
+		success = success && m_dx11VertexPreviousPosition.moveFromGPU();
+		success = success && m_dx11VertexVelocity.moveFromGPU();
+		success = success && m_dx11VertexForceAccumulator.moveFromGPU();
+		success = success && m_dx11VertexNormal.moveFromGPU();
+		success = success && m_dx11VertexInverseMass.moveFromGPU();
+		success = success && m_dx11VertexArea.moveFromGPU();
+		success = success && m_dx11VertexTriangleCount.moveFromGPU();
+	}
+	else
+	{
+		if (bCopyMinimum)
+		{
+			success = success && m_dx11VertexPosition.copyFromGPU();
+			success = success && m_dx11VertexNormal.copyFromGPU();
+		}
+		else
+		{
+			success = success && m_dx11ClothIdentifier.copyFromGPU();
+			success = success && m_dx11VertexPosition.copyFromGPU();
+			success = success && m_dx11VertexPreviousPosition.copyFromGPU();
+			success = success && m_dx11VertexVelocity.copyFromGPU();
+			success = success && m_dx11VertexForceAccumulator.copyFromGPU();
+			success = success && m_dx11VertexNormal.copyFromGPU();
+			success = success && m_dx11VertexInverseMass.copyFromGPU();
+			success = success && m_dx11VertexArea.copyFromGPU();
+			success = success && m_dx11VertexTriangleCount.copyFromGPU();
+		}
+	}
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+
+btSoftBodyTriangleDataDX11::btSoftBodyTriangleDataDX11( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+	m_dx11VertexIndices( d3dDevice, d3dDeviceContext, &m_vertexIndices, false ),
+	m_dx11Area( d3dDevice, d3dDeviceContext, &m_area, false ),
+	m_dx11Normal( d3dDevice, d3dDeviceContext, &m_normal, false )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyTriangleDataDX11::~btSoftBodyTriangleDataDX11()
+{
+
+}
+
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyTriangleDataDX11::createTriangles( int numTriangles )
+{
+	int previousSize = getNumTriangles();
+	int newSize = previousSize + numTriangles;
+
+	btSoftBodyTriangleData::createTriangles( numTriangles );
+
+	// Resize the link addresses array as well
+	m_triangleAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyTriangleDataDX11::setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex )
+{
+	btSoftBodyTriangleData::setTriangleAt( triangle, triangleIndex );
+
+	m_triangleAddresses[triangleIndex] = triangleIndex;
+}
+
+bool btSoftBodyTriangleDataDX11::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyTriangleDataDX11::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11VertexIndices.moveToGPU();
+	success = success && m_dx11Area.moveToGPU();
+	success = success && m_dx11Normal.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyTriangleDataDX11::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11VertexIndices.moveFromGPU();
+	success = success && m_dx11Area.moveFromGPU();
+	success = success && m_dx11Normal.moveFromGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+/**
+ * Generate (and later update) the batching for the entire triangle set.
+ * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+ * In theory we could delay it until just before we need the cloth.
+ * It's a one-off overhead, though, so that is a later optimisation.
+ */
+void btSoftBodyTriangleDataDX11::generateBatches()
+{
+	int numTriangles = getNumTriangles();
+	if( numTriangles == 0 )
+		return;
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numTriangles );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		int vertex0 = getVertexSet(triangleIndex).vertex0;
+		int vertex1 = getVertexSet(triangleIndex).vertex1;
+		int vertex2 = getVertexSet(triangleIndex).vertex2;
+		
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+		if( vertex2 > maxVertex )
+			maxVertex = vertex2;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+
+	//std::cout << "\n";
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the faces attached to either of the connected 
+	// nodes is in
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		int vertex0 = getVertexSet(triangleLocation).vertex0;
+		int vertex1 = getVertexSet(triangleLocation).vertex1;
+		int vertex2 = getVertexSet(triangleLocation).vertex2;
+
+		// Get the three node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+		btAlignedObjectArray< int > &colourListVertex2( vertexConnectedColourLists[vertex2] );
+
+		// Choose the minimum colour that is in none of the lists
+		int colour = 0;
+		while( 
+			colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || 
+			colourListVertex1.findLinearSearch(colour) != colourListVertex1.size() ||
+			colourListVertex2.findLinearSearch(colour) != colourListVertex2.size() )
+		{
+			++colour;
+		}
+		// i should now be the minimum colour in neither list
+		// Add to the three lists so that future edges don't share
+		// And store the colour against this face
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		colourListVertex2.push_back(colour);
+
+		batchValues[triangleIndex] = colour;
+	}
+
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numTriangles; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+
+	m_batchStartLengths.resize(batchCounts.size());
+	m_batchStartLengths[0] = BatchPair( 0, 0 );
+
+
+	int sum = 0;
+	for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+	{
+		m_batchStartLengths[batchIndex].start = sum;
+		m_batchStartLengths[batchIndex].length = batchCounts[batchIndex];
+		sum += batchCounts[batchIndex];
+	}
+	
+	/////////////////////////////
+	// Sort data based on batches
+	
+	// Create source arrays by copying originals
+	btAlignedObjectArray<btSoftBodyTriangleData::TriangleNodeSet>							m_vertexIndices_Backup(m_vertexIndices);
+	btAlignedObjectArray<float>										m_area_Backup(m_area);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>					m_normal_Backup(m_normal);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[triangleIndex];
+		int newLocation = m_batchStartLengths[batch].start + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_vertexIndices[newLocation] = m_vertexIndices_Backup[triangleLocation];
+		m_area[newLocation] = m_area_Backup[triangleLocation];
+		m_normal[newLocation] = m_normal_Backup[triangleLocation];
+
+		// Update the locations array to account for the moved entry
+		m_triangleAddresses[triangleIndex] = newLocation;
+	}
+} // btSoftBodyTriangleDataDX11::generateBatches
+
+
+
+
+
+
+
+
+
+
+
+
+btDX11SoftBodySolver::btDX11SoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory) :
+	m_dx11Device( dx11Device ),
+	m_dx11Context( dx11Context ),
+	dxFunctions( m_dx11Device, m_dx11Context, dx11CompileFromMemory ),
+	m_linkData(m_dx11Device, m_dx11Context),
+	m_vertexData(m_dx11Device, m_dx11Context),
+	m_triangleData(m_dx11Device, m_dx11Context),
+	m_dx11PerClothAcceleration( m_dx11Device, m_dx11Context, &m_perClothAcceleration, true ),
+	m_dx11PerClothWindVelocity( m_dx11Device, m_dx11Context, &m_perClothWindVelocity, true ),
+	m_dx11PerClothDampingFactor( m_dx11Device, m_dx11Context, &m_perClothDampingFactor, true ),
+	m_dx11PerClothVelocityCorrectionCoefficient( m_dx11Device, m_dx11Context, &m_perClothVelocityCorrectionCoefficient, true ),
+	m_dx11PerClothLiftFactor( m_dx11Device, m_dx11Context, &m_perClothLiftFactor, true ),
+	m_dx11PerClothDragFactor( m_dx11Device, m_dx11Context, &m_perClothDragFactor, true ),
+	m_dx11PerClothMediumDensity( m_dx11Device, m_dx11Context, &m_perClothMediumDensity, true ),
+	m_dx11PerClothCollisionObjects( m_dx11Device, m_dx11Context, &m_perClothCollisionObjects, true ),
+	m_dx11CollisionObjectDetails( m_dx11Device, m_dx11Context, &m_collisionObjectDetails, true ),
+	m_dx11PerClothMinBounds( m_dx11Device, m_dx11Context, &m_perClothMinBounds, false ),
+	m_dx11PerClothMaxBounds( m_dx11Device, m_dx11Context, &m_perClothMaxBounds, false ),
+	m_dx11PerClothFriction( m_dx11Device, m_dx11Context, &m_perClothFriction, false ),
+	m_enableUpdateBounds(false)
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+}
+
+btDX11SoftBodySolver::~btDX11SoftBodySolver()
+{	
+	releaseKernels();
+}
+
+void btDX11SoftBodySolver::releaseKernels()
+{
+	
+	SAFE_RELEASE( prepareLinksKernel.kernel );
+	SAFE_RELEASE( prepareLinksKernel.constBuffer );
+	SAFE_RELEASE( integrateKernel.kernel );
+	SAFE_RELEASE( integrateKernel.constBuffer );
+	SAFE_RELEASE( integrateKernel.kernel );
+	SAFE_RELEASE( solvePositionsFromLinksKernel.constBuffer );
+	SAFE_RELEASE( solvePositionsFromLinksKernel.kernel );
+	SAFE_RELEASE( updatePositionsFromVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( updatePositionsFromVelocitiesKernel.kernel );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel );
+	SAFE_RELEASE( resetNormalsAndAreasKernel.constBuffer );
+	SAFE_RELEASE( resetNormalsAndAreasKernel.kernel );
+	SAFE_RELEASE( normalizeNormalsAndAreasKernel.constBuffer );
+	SAFE_RELEASE( normalizeNormalsAndAreasKernel.kernel );
+	SAFE_RELEASE( updateSoftBodiesKernel.constBuffer );
+	SAFE_RELEASE( updateSoftBodiesKernel.kernel );
+	SAFE_RELEASE( solveCollisionsAndUpdateVelocitiesKernel.kernel );
+	SAFE_RELEASE( solveCollisionsAndUpdateVelocitiesKernel.constBuffer );
+	SAFE_RELEASE( computeBoundsKernel.kernel );
+	SAFE_RELEASE( computeBoundsKernel.constBuffer );
+	SAFE_RELEASE( vSolveLinksKernel.kernel );
+	SAFE_RELEASE( vSolveLinksKernel.constBuffer );
+
+	SAFE_RELEASE( addVelocityKernel.constBuffer );
+	SAFE_RELEASE( addVelocityKernel.kernel );
+	SAFE_RELEASE( applyForcesKernel.constBuffer );
+	SAFE_RELEASE( applyForcesKernel.kernel );
+
+	m_shadersInitialized = false;
+}
+
+
+void btDX11SoftBodySolver::copyBackToSoftBodies(bool bMove)
+{
+	// Move the vertex data back to the host first
+	m_vertexData.moveFromAccelerator(!bMove);
+
+	// Loop over soft bodies, copying all the vertex positions back for each body in turn
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[ softBodyIndex ];
+		btSoftBody *softBody = softBodyInterface->getSoftBody();
+
+		int firstVertex = softBodyInterface->getFirstVertex();
+		int numVertices = softBodyInterface->getNumVertices();
+
+		// Copy vertices from solver back into the softbody
+		for( int vertex = 0; vertex < numVertices; ++vertex )
+		{
+			using Vectormath::Aos::Point3;
+			Point3 vertexPosition( getVertexData().getVertexPositions()[firstVertex + vertex] );
+
+			softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() );
+			softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() );
+			softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() );
+
+			softBody->m_nodes[vertex].m_n.setX( vertexPosition.getX() );
+			softBody->m_nodes[vertex].m_n.setY( vertexPosition.getY() );
+			softBody->m_nodes[vertex].m_n.setZ( vertexPosition.getZ() );
+		}
+	}
+} // btDX11SoftBodySolver::copyBackToSoftBodies
+
+
+void btDX11SoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies, bool forceUpdate )
+{
+	if( forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothMinBounds.push_back( UIntVector3( 0, 0, 0 ) );
+			m_perClothMaxBounds.push_back( UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ) );
+			m_perClothFriction.push_back( softBody->getFriction() );
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			int maxVertices = numVertices;
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( maxVertices, softBodyIndex );
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+		}
+
+
+
+		updateConstants(0.f);
+
+
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+	}
+}
+
+
+btSoftBodyLinkData &btDX11SoftBodySolver::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+btSoftBodyVertexData &btDX11SoftBodySolver::getVertexData()
+{
+	// TODO: Consider setting vertex data to "changed" here
+	return m_vertexData;
+}
+
+btSoftBodyTriangleData &btDX11SoftBodySolver::getTriangleData()
+{
+	// TODO: Consider setting triangle data to "changed" here
+	return m_triangleData;
+}
+
+bool btDX11SoftBodySolver::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
+void btDX11SoftBodySolver::resetNormalsAndAreas( int numVertices )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdateSoftBodiesCB constBuffer;
+	
+	constBuffer.numNodes = numVertices;
+	constBuffer.epsilon = FLT_EPSILON;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( resetNormalsAndAreasKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::resetNormalsAndAreas
+
+void btDX11SoftBodySolver::normalizeNormalsAndAreas( int numVertices )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdateSoftBodiesCB constBuffer;
+	
+	constBuffer.numNodes = numVertices;
+	constBuffer.epsilon = FLT_EPSILON;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch	
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexTriangleCount.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( normalizeNormalsAndAreasKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::normalizeNormalsAndAreas
+
+void btDX11SoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdateSoftBodiesCB constBuffer;
+	
+	constBuffer.startFace = firstTriangle;
+	constBuffer.numFaces = numTriangles;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updateSoftBodiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateSoftBodiesCB) );	
+	m_dx11Context->Unmap( updateSoftBodiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updateSoftBodiesKernel.constBuffer );
+
+	// Set resources and dispatch	
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_triangleData.m_dx11VertexIndices.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexNormal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexArea.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_triangleData.m_dx11Normal.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_triangleData.m_dx11Area.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updateSoftBodiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (numTriangles + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::executeUpdateSoftBodies
+
+void btDX11SoftBodySolver::updateSoftBodies()
+{
+	using namespace Vectormath::Aos;
+
+
+	int numVertices = m_vertexData.getNumVertices();
+	int numTriangles = m_triangleData.getNumTriangles();
+
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_triangleData.moveToAccelerator();
+
+	resetNormalsAndAreas( numVertices );
+
+
+	// Go through triangle batches so updates occur correctly
+	for( int batchIndex = 0; batchIndex < m_triangleData.m_batchStartLengths.size(); ++batchIndex )
+	{
+
+		int startTriangle = m_triangleData.m_batchStartLengths[batchIndex].start;
+		int numTriangles = m_triangleData.m_batchStartLengths[batchIndex].length;
+
+		executeUpdateSoftBodies( startTriangle, numTriangles );
+	}
+
+
+	normalizeNormalsAndAreas( numVertices );
+	
+
+} // btDX11SoftBodySolver::updateSoftBodies
+
+
+Vectormath::Aos::Vector3 btDX11SoftBodySolver::ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a )
+{
+	return a*Vectormath::Aos::dot(v, a);
+}
+
+void btDX11SoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce )
+{
+	float dtInverseMass = solverdt*inverseMass;
+	if( Vectormath::Aos::lengthSqr(force * dtInverseMass) > Vectormath::Aos::lengthSqr(vertexVelocity) )
+	{
+		vertexForce -= ProjectOnAxis( vertexVelocity, normalize( force ) )/dtInverseMass;
+	} else {
+		vertexForce += force;
+	}
+}
+
+void btDX11SoftBodySolver::applyForces( float solverdt )
+{		
+	using namespace Vectormath::Aos;
+
+
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_dx11PerClothAcceleration.moveToGPU();
+	m_dx11PerClothLiftFactor.moveToGPU();
+	m_dx11PerClothDragFactor.moveToGPU();
+	m_dx11PerClothMediumDensity.moveToGPU();
+	m_dx11PerClothWindVelocity.moveToGPU();
+
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	ApplyForcesCB constBuffer;
+	
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.solverdt = solverdt;
+	constBuffer.epsilon = FLT_EPSILON;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(ApplyForcesCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch	
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexNormal.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexArea.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothLiftFactor.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 5, 1, &(m_dx11PerClothDragFactor.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 6, 1, &(m_dx11PerClothWindVelocity.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 7, 1, &(m_dx11PerClothAcceleration.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 8, 1, &(m_dx11PerClothMediumDensity.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( applyForcesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 6, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 7, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 8, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::applyForces
+
+/**
+ * Integrate motion on the solver.
+ */
+void btDX11SoftBodySolver::integrate( float solverdt )
+{
+	// TEMPORARY COPIES
+	m_vertexData.moveToAccelerator();
+
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	IntegrateCB constBuffer;
+	
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.solverdt = solverdt;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( integrateKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(IntegrateCB) );	
+	m_dx11Context->Unmap( integrateKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &integrateKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( integrateKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 3, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::integrate
+
+float btDX11SoftBodySolver::computeTriangleArea( 
+	const Vectormath::Aos::Point3 &vertex0,
+	const Vectormath::Aos::Point3 &vertex1,
+	const Vectormath::Aos::Point3 &vertex2 )
+{
+	Vectormath::Aos::Vector3 a = vertex1 - vertex0;
+	Vectormath::Aos::Vector3 b = vertex2 - vertex0;
+	Vectormath::Aos::Vector3 crossProduct = cross(a, b);
+	float area = length( crossProduct );
+	return area;
+} // btDX11SoftBodySolver::computeTriangleArea
+
+
+void btDX11SoftBodySolver::updateBounds()
+{	
+	using Vectormath::Aos::Point3;
+	// Interpretation structure for float and int
+	
+	struct FPRep {
+		unsigned int mantissa  : 23;
+		unsigned int exponent : 8;
+		unsigned int sign    : 1;
+	};
+	union FloatAsInt
+	{
+		float floatValue;
+		int intValue;
+		unsigned int uintValue;
+		FPRep fpRep;
+	};
+
+	
+	// Update bounds array to min and max int values to allow easy atomics
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		m_perClothMinBounds[softBodyIndex] = UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX );
+		m_perClothMaxBounds[softBodyIndex] = UIntVector3( 0, 0, 0 );
+	}
+	
+	m_dx11PerClothMinBounds.moveToGPU();
+	m_dx11PerClothMaxBounds.moveToGPU();
+
+
+	computeBounds( );
+
+
+	m_dx11PerClothMinBounds.moveFromGPU();
+	m_dx11PerClothMaxBounds.moveFromGPU();
+
+
+	
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		UIntVector3 minBoundUInt = m_perClothMinBounds[softBodyIndex];
+		UIntVector3 maxBoundUInt = m_perClothMaxBounds[softBodyIndex];
+				
+		// Convert back to float
+		FloatAsInt fai;
+
+		btVector3 minBound;
+		fai.uintValue = minBoundUInt.x;
+	    fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		minBound.setX( fai.floatValue );
+		fai.uintValue = minBoundUInt.y;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		minBound.setY( fai.floatValue );
+		fai.uintValue = minBoundUInt.z;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		minBound.setZ( fai.floatValue );
+
+		btVector3 maxBound;
+		fai.uintValue = maxBoundUInt.x;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		maxBound.setX( fai.floatValue );
+		fai.uintValue = maxBoundUInt.y;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		maxBound.setY( fai.floatValue );
+		fai.uintValue = maxBoundUInt.z;
+		fai.uintValue ^= (((fai.uintValue >> 31) - 1) | 0x80000000);
+		maxBound.setZ( fai.floatValue );
+		
+		// And finally assign to the soft body
+		m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound );
+	}
+}
+
+void btDX11SoftBodySolver::updateConstants( float timeStep )
+{
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+} // btDX11SoftBodySolver::updateConstants
+
+/**
+ * Sort the collision object details array and generate indexing into it for the per-cloth collision object array.
+ */
+void btDX11SoftBodySolver::prepareCollisionConstraints()
+{
+	// First do a simple sort on the collision objects
+	btAlignedObjectArray<int> numObjectsPerClothPrefixSum;
+	btAlignedObjectArray<int> numObjectsPerCloth;
+	numObjectsPerCloth.resize( m_softBodySet.size(), 0 );
+	numObjectsPerClothPrefixSum.resize( m_softBodySet.size(), 0 );
+
+
+	class QuickSortCompare
+	{
+		public:
+
+		bool operator() ( const CollisionShapeDescription& a, const CollisionShapeDescription& b ) const
+		{
+			return ( a.softBodyIdentifier < b.softBodyIdentifier );
+		}
+	};
+
+	QuickSortCompare comparator;
+	m_collisionObjectDetails.quickSort( comparator );
+
+	// Generating indexing for perClothCollisionObjects
+	// First clear the previous values with the "no collision object for cloth" constant
+	for( int clothIndex = 0; clothIndex < m_perClothCollisionObjects.size(); ++clothIndex )
+	{
+		m_perClothCollisionObjects[clothIndex].firstObject = -1;
+		m_perClothCollisionObjects[clothIndex].endObject = -1;
+	}
+	int currentCloth = 0;
+	int startIndex = 0;
+	for( int collisionObject = 0; collisionObject < m_collisionObjectDetails.size(); ++collisionObject )
+	{
+		int nextCloth = m_collisionObjectDetails[collisionObject].softBodyIdentifier;
+		if( nextCloth != currentCloth )
+		{	
+			// Changed cloth in the array
+			// Set the end index and the range is what we need for currentCloth
+			m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+			m_perClothCollisionObjects[currentCloth].endObject = collisionObject;
+			currentCloth = nextCloth;
+			startIndex = collisionObject;
+		}
+	}
+
+	// And update last cloth	
+	m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+	m_perClothCollisionObjects[currentCloth].endObject =  m_collisionObjectDetails.size();
+	
+} // btDX11SoftBodySolver::prepareCollisionConstraints
+
+
+void btDX11SoftBodySolver::solveConstraints( float solverdt )
+{
+
+	//std::cerr << "'GPU' solve constraints\n";
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+	int numLinks = m_linkData.getNumLinks();
+	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_dx11PerClothDampingFactor.moveToGPU();
+	m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU();
+
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+
+	prepareLinks();	
+
+	for( int iteration = 0; iteration < m_numberOfVelocityIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForVelocity( startLink, numLinks, kst );
+		}
+	}
+
+	
+	prepareCollisionConstraints();
+
+	// Compute new positions from velocity
+	// Also update the previous position so that our position computation is now based on the new position from the velocity solution
+	// rather than based directly on the original positions
+	if( m_numberOfVelocityIterations > 0 )
+	{
+		updateVelocitiesFromPositionsWithVelocities( 1.f/solverdt );
+	} else {
+		updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
+	}
+
+
+	// Solve drift
+	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForPosition( startLink, numLinks, kst, ti );
+		}
+		
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+} // btDX11SoftBodySolver::solveConstraints
+
+
+
+
+//////////////////////////////////////
+// Kernel dispatches
+void btDX11SoftBodySolver::prepareLinks()
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	PrepareLinksCB constBuffer;
+	
+	constBuffer.numLinks = m_linkData.getNumLinks();
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( prepareLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(PrepareLinksCB) );	
+	m_dx11Context->Unmap( prepareLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &prepareLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11Links.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_linkData.m_dx11LinksLengthRatio.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_linkData.m_dx11LinksCLength.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( prepareLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numLinks + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+		
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}
+} // btDX11SoftBodySolver::prepareLinks
+
+
+void btDX11SoftBodySolver::updatePositionsFromVelocities( float solverdt )
+{
+	// No need to batch link solver, it is entirely parallel
+	// Copy kernel parameters to GPU
+	UpdatePositionsFromVelocitiesCB constBuffer;
+	
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.solverSDT = solverdt;
+	
+	// Todo: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updatePositionsFromVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdatePositionsFromVelocitiesCB) );	
+	m_dx11Context->Unmap( updatePositionsFromVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updatePositionsFromVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch			
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updatePositionsFromVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::updatePositionsFromVelocities
+
+void btDX11SoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti )
+{
+	// Copy kernel parameters to GPU
+	SolvePositionsFromLinksKernelCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.startLink = startLink;
+	constBuffer.numLinks = numLinks;
+
+	constBuffer.kst = kst;
+	constBuffer.ti = ti;
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) );	
+	m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11Links.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numLinks + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+	
+} // btDX11SoftBodySolver::solveLinksForPosition
+
+void btDX11SoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst )
+{
+	// Copy kernel parameters to GPU
+	VSolveLinksCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+
+	constBuffer.startLink = startLink;
+	constBuffer.numLinks = numLinks;
+	constBuffer.kst = kst;
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( vSolveLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(VSolveLinksCB) );	
+	m_dx11Context->Unmap( vSolveLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &vSolveLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11Links.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11LinksLengthRatio.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_linkData.m_dx11LinksCLength.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( vSolveLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numLinks + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SoftBodySolver::solveLinksForVelocity
+
+
+void btDX11SoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	UpdateVelocitiesFromPositionsWithVelocitiesCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.isolverdt = isolverdt;
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );	
+	m_dx11Context->Unmap( updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothVelocityCorrectionCoefficient.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothDampingFactor.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+
+} // btDX11SoftBodySolver::updateVelocitiesFromPositionsWithVelocities
+
+void btDX11SoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	UpdateVelocitiesFromPositionsWithoutVelocitiesCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.isolverdt = isolverdt;
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB) );	
+	m_dx11Context->Unmap( updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothDampingFactor.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( updateVelocitiesFromPositionsWithoutVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+
+} // btDX11SoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities
+
+
+void btDX11SoftBodySolver::computeBounds( )
+{
+	ComputeBoundsCB constBuffer;
+	m_vertexData.moveToAccelerator();
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.numSoftBodies = m_softBodySet.size();
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( computeBoundsKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(ComputeBoundsCB) );	
+	m_dx11Context->Unmap( computeBoundsKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &computeBoundsKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPosition.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_dx11PerClothMinBounds.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_dx11PerClothMaxBounds.getUAV()), NULL );
+	
+	// Execute the kernel
+	m_dx11Context->CSSetShader( computeBoundsKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+}
+
+void btDX11SoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt )
+{
+
+	// Copy kernel parameters to GPU
+	m_vertexData.moveToAccelerator();
+	m_dx11PerClothFriction.moveToGPU();
+	m_dx11PerClothDampingFactor.moveToGPU();
+	m_dx11PerClothCollisionObjects.moveToGPU();
+	m_dx11CollisionObjectDetails.moveToGPU();
+
+	SolveCollisionsAndUpdateVelocitiesCB constBuffer;
+
+	// Set the first link of the batch
+	// and the batch size
+	constBuffer.numNodes = m_vertexData.getNumVertices();
+	constBuffer.isolverdt = isolverdt;
+
+
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( solveCollisionsAndUpdateVelocitiesKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(SolveCollisionsAndUpdateVelocitiesCB) );	
+	m_dx11Context->Unmap( solveCollisionsAndUpdateVelocitiesKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &solveCollisionsAndUpdateVelocitiesKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_vertexData.m_dx11ClothIdentifier.getSRV()) );	
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_vertexData.m_dx11VertexPreviousPosition.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_dx11PerClothFriction.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_dx11PerClothDampingFactor.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_dx11PerClothCollisionObjects.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 5, 1, &(m_dx11CollisionObjectDetails.getSRV()) );
+
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexForceAccumulator.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &(m_vertexData.m_dx11VertexVelocity.getUAV()), NULL );
+	m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+	
+	// Execute the kernel
+	m_dx11Context->CSSetShader( solveCollisionsAndUpdateVelocitiesKernel.kernel, NULL, 0 );
+
+	int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 1, 1, &pUAViewNULL, NULL );
+		m_dx11Context->CSSetUnorderedAccessViews( 2, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+
+} // btDX11SoftBodySolver::solveCollisionsAndUpdateVelocities
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+btDX11SoftBodySolver::btAcceleratedSoftBodyInterface *btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyInterface;
+	}
+	return 0;
+}
+
+const btDX11SoftBodySolver::btAcceleratedSoftBodyInterface * const btDX11SoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody ) const
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyInterface;
+	}
+	return 0;
+}
+
+int btDX11SoftBodySolver::findSoftBodyIndex( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyIndex;
+	}
+	return 1;
+}
+
+
+void btSoftBodySolverOutputDXtoCPU::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+	
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::DX_SOLVER || solver->getSolverType() == btSoftBodySolver::DX_SIMD_SOLVER );
+	btDX11SoftBodySolver *dxSolver = static_cast< btDX11SoftBodySolver * >( solver );
+
+	btDX11SoftBodySolver::btAcceleratedSoftBodyInterface * currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataDX11 &vertexData( dxSolver->m_vertexData );
+	
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{		
+		// If we're doing a CPU-buffer copy must copy the data back to the host first
+		vertexData.m_dx11VertexPosition.copyFromGPU();
+		vertexData.m_dx11VertexNormal.copyFromGPU();
+
+		const int firstVertex = currentCloth->getFirstVertex();
+		const int lastVertex = firstVertex + currentCloth->getNumVertices();
+		const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);						
+		float *basePointer = cpuVertexBuffer->getBasePointer();						
+
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			const int vertexOffset = cpuVertexBuffer->getVertexOffset();
+			const int vertexStride = cpuVertexBuffer->getVertexStride();
+			float *vertexPointer = basePointer + vertexOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Point3 position = vertexData.getPosition(vertexIndex);
+				*(vertexPointer + 0) = position.getX();
+				*(vertexPointer + 1) = position.getY();
+				*(vertexPointer + 2) = position.getZ();
+				vertexPointer += vertexStride;
+			}
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			const int normalOffset = cpuVertexBuffer->getNormalOffset();
+			const int normalStride = cpuVertexBuffer->getNormalStride();
+			float *normalPointer = basePointer + normalOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Vector3 normal = vertexData.getNormal(vertexIndex);
+				*(normalPointer + 0) = normal.getX();
+				*(normalPointer + 1) = normal.getY();
+				*(normalPointer + 2) = normal.getZ();
+				normalPointer += normalStride;
+			}
+		}
+	} 
+} // btDX11SoftBodySolver::outputToVertexBuffers
+
+
+
+bool btSoftBodySolverOutputDXtoDX::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
+void btSoftBodySolverOutputDXtoDX::releaseKernels()
+{
+	SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.constBuffer );
+	SAFE_RELEASE( outputToVertexArrayWithNormalsKernel.kernel );
+	SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.constBuffer );
+	SAFE_RELEASE( outputToVertexArrayWithoutNormalsKernel.kernel );
+
+	m_shadersInitialized = false;
+}
+
+
+bool btSoftBodySolverOutputDXtoDX::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+	if( m_shadersInitialized )
+		return true;
+	
+
+	outputToVertexArrayWithNormalsKernel = dxFunctions.compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithNormalsKernel", sizeof(OutputToVertexArrayCB) );
+	if( !outputToVertexArrayWithNormalsKernel.constBuffer)
+		returnVal = false;
+	outputToVertexArrayWithoutNormalsKernel = dxFunctions.compileComputeShaderFromString( OutputToVertexArrayHLSLString, "OutputToVertexArrayWithoutNormalsKernel", sizeof(OutputToVertexArrayCB) );
+	if( !outputToVertexArrayWithoutNormalsKernel.constBuffer )
+		returnVal = false;
+
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+}
+
+
+void btSoftBodySolverOutputDXtoDX::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+	
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::DX_SOLVER || solver->getSolverType() == btSoftBodySolver::DX_SIMD_SOLVER );
+	btDX11SoftBodySolver *dxSolver = static_cast< btDX11SoftBodySolver * >( solver );
+	checkInitialized();
+	btDX11SoftBodySolver::btAcceleratedSoftBodyInterface * currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataDX11 &vertexData( dxSolver->m_vertexData );
+
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{		
+		btSoftBodySolverOutputDXtoDX::copySoftBodyToVertexBuffer( softBody, vertexBuffer );
+	} else 	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::DX11_BUFFER )
+	{
+		// Do a DX11 copy shader DX to DX copy
+
+		const btDX11VertexBufferDescriptor *dx11VertexBuffer = static_cast< btDX11VertexBufferDescriptor* >(vertexBuffer);	
+
+		// No need to batch link solver, it is entirely parallel
+		// Copy kernel parameters to GPU
+		OutputToVertexArrayCB constBuffer;
+		ID3D11ComputeShader* outputToVertexArrayShader = outputToVertexArrayWithoutNormalsKernel.kernel;
+		ID3D11Buffer* outputToVertexArrayConstBuffer = outputToVertexArrayWithoutNormalsKernel.constBuffer;
+		
+		constBuffer.startNode = firstVertex;
+		constBuffer.numNodes = currentCloth->getNumVertices();
+		constBuffer.positionOffset = vertexBuffer->getVertexOffset();
+		constBuffer.positionStride = vertexBuffer->getVertexStride();
+		if( vertexBuffer->hasNormals() )
+		{
+			constBuffer.normalOffset = vertexBuffer->getNormalOffset();
+			constBuffer.normalStride = vertexBuffer->getNormalStride();
+			outputToVertexArrayShader = outputToVertexArrayWithNormalsKernel.kernel;
+			outputToVertexArrayConstBuffer = outputToVertexArrayWithNormalsKernel.constBuffer;
+		}	
+		
+		// TODO: factor this out. Number of nodes is static and sdt might be, too, we can update this just once on setup
+		D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+		dxFunctions.m_dx11Context->Map( outputToVertexArrayConstBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+		memcpy( MappedResource.pData, &constBuffer, sizeof(OutputToVertexArrayCB) );	
+		dxFunctions.m_dx11Context->Unmap( outputToVertexArrayConstBuffer, 0 );
+		dxFunctions.m_dx11Context->CSSetConstantBuffers( 0, 1, &outputToVertexArrayConstBuffer );
+
+		// Set resources and dispatch
+		dxFunctions.m_dx11Context->CSSetShaderResources( 0, 1, &(vertexData.m_dx11VertexPosition.getSRV()) );
+		dxFunctions.m_dx11Context->CSSetShaderResources( 1, 1, &(vertexData.m_dx11VertexNormal.getSRV()) );
+
+		ID3D11UnorderedAccessView* dx11UAV = dx11VertexBuffer->getDX11UAV();
+		dxFunctions.m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(dx11UAV), NULL );
+
+		// Execute the kernel
+		dxFunctions.m_dx11Context->CSSetShader( outputToVertexArrayShader, NULL, 0 );
+
+		int	numBlocks = (constBuffer.numNodes + (128-1)) / 128;
+		dxFunctions.m_dx11Context->Dispatch(numBlocks, 1, 1 );
+
+		{
+			// Tidy up 
+			ID3D11ShaderResourceView* pViewNULL = NULL;
+			dxFunctions.m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+			dxFunctions.m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+
+			ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+			dxFunctions.m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+			ID3D11Buffer *pBufferNull = NULL;
+			dxFunctions.m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+		}	
+	}
+} // btDX11SoftBodySolver::outputToVertexBuffers
+
+
+
+
+DXFunctions::KernelDesc DXFunctions::compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros )
+{
+	const char *cs5String = "cs_5_0";
+
+	HRESULT hr = S_OK;
+	ID3DBlob* pErrorBlob = NULL;
+	ID3DBlob* pBlob = NULL;
+	ID3D11ComputeShader*		kernelPointer = 0;
+
+	hr = m_dx11CompileFromMemory( 
+		shaderString,
+		strlen(shaderString),
+		shaderName,
+		compileMacros,
+		NULL,
+		shaderName,
+		cs5String,
+		D3D10_SHADER_ENABLE_STRICTNESS,
+		NULL,
+		NULL,
+		&pBlob,
+		&pErrorBlob,
+		NULL
+		);
+
+	if( FAILED(hr) )
+	{
+		if( pErrorBlob ) {
+			btAssert( "Compilation of compute shader failed\n" );
+			char *debugString = (char*)pErrorBlob->GetBufferPointer();
+			OutputDebugStringA( debugString );
+		}
+	
+		SAFE_RELEASE( pErrorBlob );
+		SAFE_RELEASE( pBlob );    
+
+		DXFunctions::KernelDesc descriptor;
+		descriptor.kernel = 0;
+		descriptor.constBuffer = 0;
+		return descriptor;
+	}    
+
+	// Create the Compute Shader
+	hr = m_dx11Device->CreateComputeShader( pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL, &kernelPointer );
+	if( FAILED( hr ) )
+	{
+		DXFunctions::KernelDesc descriptor;
+		descriptor.kernel = 0;
+		descriptor.constBuffer = 0;
+		return descriptor;
+	}
+
+	ID3D11Buffer* constBuffer = 0;
+	if( constBufferSize > 0 )
+	{
+		// Create the constant buffer
+		D3D11_BUFFER_DESC constant_buffer_desc;
+		ZeroMemory(&constant_buffer_desc, sizeof(constant_buffer_desc));
+		constant_buffer_desc.ByteWidth = constBufferSize;
+		constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
+		constant_buffer_desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+		constant_buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+		m_dx11Device->CreateBuffer(&constant_buffer_desc, NULL, &constBuffer);
+		if( FAILED( hr ) )
+		{
+			KernelDesc descriptor;
+			descriptor.kernel = 0;
+			descriptor.constBuffer = 0;
+			return descriptor;
+		}
+	}
+
+	SAFE_RELEASE( pErrorBlob );
+	SAFE_RELEASE( pBlob );
+
+	DXFunctions::KernelDesc descriptor;
+	descriptor.kernel = kernelPointer;
+	descriptor.constBuffer = constBuffer;
+	return descriptor;
+} // compileComputeShader
+
+
+
+bool btDX11SoftBodySolver::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+	if( m_shadersInitialized )
+		return true;
+
+	prepareLinksKernel = dxFunctions.compileComputeShaderFromString( PrepareLinksHLSLString, "PrepareLinksKernel", sizeof(PrepareLinksCB) );
+	if( !prepareLinksKernel.constBuffer )
+		returnVal = false;
+	updatePositionsFromVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) );
+	if( !updatePositionsFromVelocitiesKernel.constBuffer )
+		returnVal = false;
+	solvePositionsFromLinksKernel = dxFunctions.compileComputeShaderFromString( SolvePositionsHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB) );
+	if( !updatePositionsFromVelocitiesKernel.constBuffer )
+		returnVal = false;
+	vSolveLinksKernel = dxFunctions.compileComputeShaderFromString( VSolveLinksHLSLString, "VSolveLinksKernel", sizeof(VSolveLinksCB) );
+	if( !vSolveLinksKernel.constBuffer )
+		returnVal = false;
+	updateVelocitiesFromPositionsWithVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );
+	if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer )
+		returnVal = false;
+	updateVelocitiesFromPositionsWithoutVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB) );
+	if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer )
+		returnVal = false;
+	integrateKernel = dxFunctions.compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) );
+	if( !integrateKernel.constBuffer )
+		returnVal = false;
+	applyForcesKernel = dxFunctions.compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) );
+	if( !applyForcesKernel.constBuffer )
+		returnVal = false;
+	solveCollisionsAndUpdateVelocitiesKernel = dxFunctions.compileComputeShaderFromString( SolveCollisionsAndUpdateVelocitiesHLSLString, "SolveCollisionsAndUpdateVelocitiesKernel", sizeof(SolveCollisionsAndUpdateVelocitiesCB) );
+	if( !solveCollisionsAndUpdateVelocitiesKernel.constBuffer )
+		returnVal = false;
+
+	// TODO: Rename to UpdateSoftBodies
+	resetNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !resetNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	normalizeNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !normalizeNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	updateSoftBodiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !updateSoftBodiesKernel.constBuffer )
+		returnVal = false;
+
+	computeBoundsKernel = dxFunctions.compileComputeShaderFromString( ComputeBoundsHLSLString, "ComputeBoundsKernel", sizeof(ComputeBoundsCB) );
+	if( !computeBoundsKernel.constBuffer )
+		returnVal = false;
+
+
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+}
+
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+
+void btDX11SoftBodySolver::btAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound )
+{
+	float scalarMargin = this->getSoftBody()->getCollisionShape()->getMargin();
+	btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin );
+	m_softBody->m_bounds[0] = lowerBound - vectorMargin;
+	m_softBody->m_bounds[1] = upperBound + vectorMargin;
+}
+
+void btDX11SoftBodySolver::processCollision( btSoftBody*, btSoftBody* )
+{
+
+}
+
+// Add the collision object to the set to deal with for a particular soft body
+void btDX11SoftBodySolver::processCollision( btSoftBody *softBody, const btCollisionObjectWrapper* collisionObject )
+{
+	int softBodyIndex = findSoftBodyIndex( softBody );
+
+	if( softBodyIndex >= 0 )
+	{
+		const btCollisionShape *collisionShape = collisionObject->getCollisionShape();
+		float friction = collisionObject->getCollisionObject()->getFriction();
+		int shapeType = collisionShape->getShapeType();
+		if( shapeType == CAPSULE_SHAPE_PROXYTYPE )
+		{
+			// Add to the list of expected collision objects
+			CollisionShapeDescription newCollisionShapeDescription;
+			newCollisionShapeDescription.softBodyIdentifier = softBodyIndex;
+			newCollisionShapeDescription.collisionShapeType = shapeType;
+			// TODO: May need to transpose this matrix either here or in HLSL
+			newCollisionShapeDescription.shapeTransform = toTransform3(collisionObject->getWorldTransform());
+			const btCapsuleShape *capsule = static_cast<const btCapsuleShape*>( collisionShape );
+			newCollisionShapeDescription.radius = capsule->getRadius();
+			newCollisionShapeDescription.halfHeight = capsule->getHalfHeight();
+			newCollisionShapeDescription.margin = capsule->getMargin();
+			newCollisionShapeDescription.friction = friction;
+			const btRigidBody* body = static_cast< const btRigidBody* >( collisionObject->getCollisionObject() );
+			newCollisionShapeDescription.linearVelocity = toVector3(body->getLinearVelocity());
+			newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity());
+			m_collisionObjectDetails.push_back( newCollisionShapeDescription );
+
+		} else {
+#ifdef _DEBUG
+			printf("Unsupported collision shape type\n");
+#endif
+		}
+	} else {
+		btAssert("Unknown soft body");
+	}
+} // btDX11SoftBodySolver::processCollision
+
+
+
+void btDX11SoftBodySolver::predictMotion( float timeStep )
+{
+	// Clear the collision shape array for the next frame
+	// Ensure that the DX11 ones are moved off the device so they will be updated correctly
+	m_dx11CollisionObjectDetails.changedOnCPU();
+	m_dx11PerClothCollisionObjects.changedOnCPU();
+	m_collisionObjectDetails.clear();
+
+	// Fill the force arrays with current acceleration data etc
+	m_perClothWindVelocity.resize( m_softBodySet.size() );
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();
+		
+		m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
+	}
+	m_dx11PerClothWindVelocity.changedOnCPU();
+
+	// Apply forces that we know about to the cloths
+	applyForces(  timeStep * getTimeScale() );
+
+	// Itegrate motion for all soft bodies dealt with by the solver
+	integrate( timeStep * getTimeScale() );
+
+	// Update bounds
+	// Will update the bounds for all softBodies being dealt with by the solver and 
+	// set the values in the btSoftBody object
+	if (m_enableUpdateBounds)
+		updateBounds();
+
+	// End prediction work for solvers
+}
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h
new file mode 100644
index 000000000..0f50ecf79
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11.h
@@ -0,0 +1,691 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+#define BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+
+
+#include "vectormath/vmInclude.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "btSoftBodySolverLinkData_DX11.h"
+#include "btSoftBodySolverVertexData_DX11.h"
+#include "btSoftBodySolverTriangleData_DX11.h"
+
+
+
+class DXFunctions
+{
+public:
+	
+	typedef HRESULT (WINAPI * CompileFromMemoryFunc)(LPCSTR,SIZE_T,LPCSTR,const D3D10_SHADER_MACRO*,LPD3D10INCLUDE,LPCSTR,LPCSTR,UINT,UINT,ID3DX11ThreadPump*,ID3D10Blob**,ID3D10Blob**,HRESULT*);
+
+	ID3D11Device *		 m_dx11Device;
+	ID3D11DeviceContext* m_dx11Context;
+	CompileFromMemoryFunc m_dx11CompileFromMemory;
+
+	DXFunctions(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, CompileFromMemoryFunc dx11CompileFromMemory) :
+		m_dx11Device( dx11Device ),
+		m_dx11Context( dx11Context ),
+		m_dx11CompileFromMemory( dx11CompileFromMemory )
+	{
+
+	}
+
+	class KernelDesc
+	{
+	protected:
+		
+
+	public:
+		ID3D11ComputeShader* kernel;
+		ID3D11Buffer* constBuffer;
+
+		KernelDesc()
+		{
+			kernel = 0;
+			constBuffer = 0;
+		}
+
+		virtual ~KernelDesc()
+		{
+			// TODO: this should probably destroy its kernel but we need to be careful
+			// in case KernelDescs are copied
+		}
+	}; 
+
+	/**
+	 * Compile a compute shader kernel from a string and return the appropriate KernelDesc object.
+	 */
+	KernelDesc compileComputeShaderFromString( const char* shaderString, const char* shaderName, int constBufferSize, D3D10_SHADER_MACRO *compileMacros = 0 );
+
+};
+
+class btDX11SoftBodySolver : public btSoftBodySolver
+{
+protected:
+	/**
+	 * Entry in the collision shape array.
+	 * Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
+	 */
+	struct CollisionShapeDescription
+	{
+		Vectormath::Aos::Transform3 shapeTransform;
+		Vectormath::Aos::Vector3 linearVelocity;
+		Vectormath::Aos::Vector3 angularVelocity;
+
+		int softBodyIdentifier;
+		int collisionShapeType;
+	
+		// Both needed for capsule
+		float radius;
+		float halfHeight;
+		
+		float margin;
+		float friction;
+
+		CollisionShapeDescription()
+		{
+			collisionShapeType = 0;
+			margin = 0;
+			friction = 0;
+		}
+	};
+
+	struct UIntVector3
+	{
+		UIntVector3()
+		{
+			x = 0;
+			y = 0;
+			z = 0;
+			_padding = 0;
+		}
+		
+		UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
+		{
+			x = x_;
+			y = y_;
+			z = z_;
+			_padding = 0;
+		}
+			
+		unsigned int x;
+		unsigned int y;
+		unsigned int z;
+		unsigned int _padding;
+	};
+
+
+
+public:
+	/**
+	 * SoftBody class to maintain information about a soft body instance
+	 * within a solver.
+	 * This data addresses the main solver arrays.
+	 */
+	class btAcceleratedSoftBodyInterface
+	{
+	protected:
+		/** Current number of vertices that are part of this cloth */
+		int m_numVertices;
+		/** Maximum number of vertices allocated to be part of this cloth */
+		int m_maxVertices;
+		/** Current number of triangles that are part of this cloth */
+		int m_numTriangles;
+		/** Maximum number of triangles allocated to be part of this cloth */
+		int m_maxTriangles;
+		/** Index of first vertex in the world allocated to this cloth */
+		int m_firstVertex;
+		/** Index of first triangle in the world allocated to this cloth */
+		int m_firstTriangle;
+		/** Index of first link in the world allocated to this cloth */
+		int m_firstLink;
+		/** Maximum number of links allocated to this cloth */
+		int m_maxLinks;
+		/** Current number of links allocated to this cloth */
+		int m_numLinks;
+
+		/** The actual soft body this data represents */
+		btSoftBody *m_softBody;
+
+
+	public:
+		btAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
+		  m_softBody( softBody )
+		{
+			m_numVertices = 0;
+			m_maxVertices = 0;
+			m_numTriangles = 0;
+			m_maxTriangles = 0;
+			m_firstVertex = 0;
+			m_firstTriangle = 0;
+			m_firstLink = 0;
+			m_maxLinks = 0;
+			m_numLinks = 0;
+		}
+		int getNumVertices() const
+		{
+			return m_numVertices;
+		}
+
+		int getNumTriangles() const
+		{
+			return m_numTriangles;
+		}
+
+		int getMaxVertices() const
+		{
+			return m_maxVertices;
+		}
+
+		int getMaxTriangles() const
+		{
+			return m_maxTriangles;
+		}
+
+		int getFirstVertex() const
+		{
+			return m_firstVertex;
+		}
+
+		int getFirstTriangle() const
+		{
+			return m_firstTriangle;
+		}
+
+
+		/**
+		 * Update the bounds in the btSoftBody object
+		 */
+		void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
+
+		
+		// TODO: All of these set functions will have to do checks and
+		// update the world because restructuring of the arrays will be necessary
+		// Reasonable use of "friend"?
+		void setNumVertices( int numVertices )
+		{
+			m_numVertices = numVertices;
+		}	
+	
+		void setNumTriangles( int numTriangles )
+		{
+			m_numTriangles = numTriangles;
+		}
+
+		void setMaxVertices( int maxVertices )
+		{
+			m_maxVertices = maxVertices;
+		}
+
+		void setMaxTriangles( int maxTriangles )
+		{
+			m_maxTriangles = maxTriangles;
+		}
+
+		void setFirstVertex( int firstVertex )
+		{
+			m_firstVertex = firstVertex;
+		}
+
+		void setFirstTriangle( int firstTriangle )
+		{
+			m_firstTriangle = firstTriangle;
+		}
+
+		void setMaxLinks( int maxLinks )
+		{
+			m_maxLinks = maxLinks;
+		}
+
+		void setNumLinks( int numLinks )
+		{
+			m_numLinks = numLinks;
+		}
+
+		void setFirstLink( int firstLink )
+		{
+			m_firstLink = firstLink;
+		}
+
+		int getMaxLinks()
+		{
+			return m_maxLinks;
+		}
+
+		int getNumLinks()
+		{
+			return m_numLinks;
+		}
+
+		int getFirstLink()
+		{
+			return m_firstLink;
+		}
+
+		btSoftBody* getSoftBody()
+		{
+			return m_softBody;
+		}
+
+	};
+
+	
+	struct CollisionObjectIndices
+	{
+		CollisionObjectIndices( int f, int e )
+		{
+			firstObject = f;
+			endObject = e;
+		}
+
+		int firstObject;
+		int endObject;
+	};
+
+
+
+
+
+	struct PrepareLinksCB
+	{		
+		int numLinks;
+		int padding0;
+		int padding1;
+		int padding2;
+	};
+
+	struct SolvePositionsFromLinksKernelCB
+	{		
+		int startLink;
+		int numLinks;
+		float kst;
+		float ti;
+	};
+
+	struct IntegrateCB
+	{
+		int numNodes;
+		float solverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdatePositionsFromVelocitiesCB
+	{
+		int numNodes;
+		float solverSDT;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateVelocitiesFromPositionsWithoutVelocitiesCB
+	{
+		int numNodes;
+		float isolverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateVelocitiesFromPositionsWithVelocitiesCB
+	{
+		int numNodes;
+		float isolverdt;
+		int padding1;
+		int padding2;
+	};
+
+	struct UpdateSoftBodiesCB
+	{
+		int numNodes;
+		int startFace;
+		int numFaces;
+		float epsilon;
+	};
+
+
+	struct ApplyForcesCB
+	{
+		unsigned int numNodes;
+		float solverdt;
+		float epsilon;
+		int padding3;
+	};
+
+	struct AddVelocityCB
+	{
+		int startNode;
+		int lastNode;
+		float velocityX;
+		float velocityY;
+		float velocityZ;
+		int padding1;
+		int padding2;
+		int padding3;
+	};
+
+	struct VSolveLinksCB
+	{
+		int startLink;
+		int numLinks;
+		float kst;
+		int padding;
+	};
+
+	struct ComputeBoundsCB
+	{
+		int numNodes;
+		int numSoftBodies;
+		int padding1;
+		int padding2;
+	};
+
+	struct SolveCollisionsAndUpdateVelocitiesCB
+	{
+		unsigned int numNodes;
+		float isolverdt;
+		int padding0;
+		int padding1;
+	};
+
+	
+
+
+protected:
+	ID3D11Device *		 m_dx11Device;
+	ID3D11DeviceContext* m_dx11Context;
+	
+	DXFunctions dxFunctions;
+public:
+	/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
+	btSoftBodyLinkDataDX11 m_linkData;
+	btSoftBodyVertexDataDX11 m_vertexData;
+	btSoftBodyTriangleDataDX11 m_triangleData;
+
+protected:
+
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	bool m_shadersInitialized;
+
+	/** 
+	 * Cloths owned by this solver.
+	 * Only our cloths are in this array.
+	 */
+	btAlignedObjectArray< btAcceleratedSoftBodyInterface * > m_softBodySet;
+
+	/** Acceleration value to be applied to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothAcceleration;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11PerClothAcceleration;
+
+	/** Wind velocity to be applied normal to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothWindVelocity;
+	btDX11Buffer<Vectormath::Aos::Vector3>				m_dx11PerClothWindVelocity;
+
+	/** Velocity damping factor */
+	btAlignedObjectArray< float >						m_perClothDampingFactor;
+	btDX11Buffer<float>									m_dx11PerClothDampingFactor;
+
+	/** Velocity correction coefficient */
+	btAlignedObjectArray< float >						m_perClothVelocityCorrectionCoefficient;
+	btDX11Buffer<float>									m_dx11PerClothVelocityCorrectionCoefficient;
+
+	/** Lift parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothLiftFactor;
+	btDX11Buffer<float>									m_dx11PerClothLiftFactor;
+	
+	/** Drag parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothDragFactor;
+	btDX11Buffer<float>									m_dx11PerClothDragFactor;
+
+	/** Density of the medium in which each cloth sits */
+	btAlignedObjectArray< float >						m_perClothMediumDensity;
+	btDX11Buffer<float>									m_dx11PerClothMediumDensity;
+
+	
+	/** 
+	 * Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
+	 */
+	btAlignedObjectArray< CollisionObjectIndices >		m_perClothCollisionObjects;
+	btDX11Buffer<CollisionObjectIndices>				m_dx11PerClothCollisionObjects;
+
+	/** 
+	 * Collision shapes being passed across to the cloths in this solver.
+	 */
+	btAlignedObjectArray< CollisionShapeDescription >	m_collisionObjectDetails;
+	btDX11Buffer< CollisionShapeDescription >			m_dx11CollisionObjectDetails;
+
+	/** 
+	 * Minimum bounds for each cloth.
+	 * Updated by GPU and returned for use by broad phase.
+	 * These are int vectors as a reminder that they store the int representation of a float, not a float.
+	 * Bit 31 is inverted - is floats are stored with int-sortable values.
+	 */
+	btAlignedObjectArray< UIntVector3 >	m_perClothMinBounds;
+	btDX11Buffer< UIntVector3 >			m_dx11PerClothMinBounds;
+
+	/** 
+	 * Maximum bounds for each cloth.
+	 * Updated by GPU and returned for use by broad phase.
+	 * These are int vectors as a reminder that they store the int representation of a float, not a float.
+	 * Bit 31 is inverted - is floats are stored with int-sortable values.
+	 */
+	btAlignedObjectArray< UIntVector3 >	m_perClothMaxBounds;
+	btDX11Buffer< UIntVector3 >			m_dx11PerClothMaxBounds;
+
+	
+	/** 
+	 * Friction coefficient for each cloth
+	 */
+	btAlignedObjectArray< float >	m_perClothFriction;
+	btDX11Buffer< float >			m_dx11PerClothFriction;
+
+	DXFunctions::KernelDesc		prepareLinksKernel;
+	DXFunctions::KernelDesc		solvePositionsFromLinksKernel;
+	DXFunctions::KernelDesc		vSolveLinksKernel;
+	DXFunctions::KernelDesc		integrateKernel;
+	DXFunctions::KernelDesc		addVelocityKernel;
+	DXFunctions::KernelDesc		updatePositionsFromVelocitiesKernel;
+	DXFunctions::KernelDesc		updateVelocitiesFromPositionsWithoutVelocitiesKernel;
+	DXFunctions::KernelDesc		updateVelocitiesFromPositionsWithVelocitiesKernel;
+	DXFunctions::KernelDesc		solveCollisionsAndUpdateVelocitiesKernel;
+	DXFunctions::KernelDesc		resetNormalsAndAreasKernel;
+	DXFunctions::KernelDesc		normalizeNormalsAndAreasKernel;
+	DXFunctions::KernelDesc		computeBoundsKernel;
+	DXFunctions::KernelDesc		updateSoftBodiesKernel;
+
+	DXFunctions::KernelDesc		applyForcesKernel;
+
+	bool	m_enableUpdateBounds;
+
+	/**
+	 * Integrate motion on the solver.
+	 */
+	virtual void integrate( float solverdt );
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	virtual bool buildShaders();
+
+	void resetNormalsAndAreas( int numVertices );
+
+	void normalizeNormalsAndAreas( int numVertices );
+
+	void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
+
+	void prepareCollisionConstraints();
+
+	Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
+
+	void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
+
+	virtual void applyForces( float solverdt );
+	
+	virtual void updateConstants( float timeStep );
+	int findSoftBodyIndex( const btSoftBody* const softBody );
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	virtual void prepareLinks();
+
+	void updatePositionsFromVelocities( float solverdt );
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	void solveLinksForVelocity( int startLink, int numLinks, float kst );
+	
+	void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
+	void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
+	void computeBounds( );
+	void solveCollisionsAndUpdateVelocities( float isolverdt );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+
+	void updateBounds();
+
+	
+	void releaseKernels();
+
+public:
+	btDX11SoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
+
+	virtual ~btDX11SoftBodySolver();
+	
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DX_SOLVER;
+	}
+
+	void	setEnableUpdateBounds(bool enableBounds)
+	{
+		m_enableUpdateBounds = enableBounds;
+	}
+	bool getEnableUpdateBounds() const
+	{
+		return  m_enableUpdateBounds;
+	}
+
+
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual btSoftBodyVertexData &getVertexData();
+
+	virtual btSoftBodyTriangleData &getTriangleData();
+
+
+
+	
+
+	btAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
+	const btAcceleratedSoftBodyInterface * const findSoftBodyInterface( const btSoftBody* const softBody ) const;
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	
+	virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+};
+
+
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the DX to CPU version.
+ */
+class btSoftBodySolverOutputDXtoCPU : public btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutputDXtoCPU()
+	{
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the DX to DX version and subclasses DX to CPU so that it works for that too.
+ */
+class btSoftBodySolverOutputDXtoDX : public btSoftBodySolverOutputDXtoCPU
+{
+protected:
+	struct OutputToVertexArrayCB
+	{
+		int startNode;
+		int numNodes;
+		int positionOffset;
+		int positionStride;
+		
+		int normalOffset;	
+		int normalStride;
+		int padding1;
+		int padding2;
+	};
+	
+	DXFunctions dxFunctions;
+	DXFunctions::KernelDesc outputToVertexArrayWithNormalsKernel;
+	DXFunctions::KernelDesc outputToVertexArrayWithoutNormalsKernel;
+
+	
+	bool m_shadersInitialized;
+
+	bool checkInitialized();
+	bool buildShaders();
+	void releaseKernels();
+
+public:
+	btSoftBodySolverOutputDXtoDX(ID3D11Device *dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory) :
+	  dxFunctions( dx11Device, dx11Context, dx11CompileFromMemory )
+	{
+		m_shadersInitialized = false;
+	}
+
+	~btSoftBodySolverOutputDXtoDX()
+	{
+		releaseKernels();
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_DX11_SOLVER_H
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp
new file mode 100644
index 000000000..5c73ee5d2
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.cpp
@@ -0,0 +1,1051 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <cstdio>
+
+
+#define WAVEFRONT_SIZE 32
+#define WAVEFRONT_BLOCK_MULTIPLIER 2
+#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
+#define LINKS_PER_SIMD_LANE 16
+
+#define STRINGIFY( S ) STRINGIFY2( S )
+#define STRINGIFY2( S ) #S
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+
+#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
+#include "btSoftBodySolver_DX11SIMDAware.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+
+#define MSTRINGIFY(A) #A
+static char* UpdatePositionsFromVelocitiesHLSLString = 
+#include "HLSL/UpdatePositionsFromVelocities.hlsl"
+static char* SolvePositionsSIMDBatchedHLSLString = 
+#include "HLSL/SolvePositionsSIMDBatched.hlsl"
+static char* UpdateNodesHLSLString = 
+#include "HLSL/UpdateNodes.hlsl"
+static char* UpdatePositionsHLSLString = 
+#include "HLSL/UpdatePositions.hlsl"
+static char* UpdateConstantsHLSLString = 
+#include "HLSL/UpdateConstants.hlsl"
+static char* IntegrateHLSLString = 
+#include "HLSL/Integrate.hlsl"
+static char* ApplyForcesHLSLString = 
+#include "HLSL/ApplyForces.hlsl"
+static char* UpdateNormalsHLSLString = 
+#include "HLSL/UpdateNormals.hlsl"
+static char* OutputToVertexArrayHLSLString = 
+#include "HLSL/OutputToVertexArray.hlsl"
+static char* VSolveLinksHLSLString = 
+#include "HLSL/VSolveLinks.hlsl"
+static char* ComputeBoundsHLSLString = 
+#include "HLSL/ComputeBounds.hlsl"
+static char* SolveCollisionsAndUpdateVelocitiesHLSLString =
+#include "HLSL/solveCollisionsAndUpdateVelocitiesSIMDBatched.hlsl"
+
+
+
+btSoftBodyLinkDataDX11SIMDAware::btSoftBodyLinkDataDX11SIMDAware( ID3D11Device *d3dDevice, ID3D11DeviceContext *d3dDeviceContext ) : 
+		m_d3dDevice( d3dDevice ),
+		m_d3dDeviceContext( d3dDeviceContext ),
+		m_wavefrontSize( WAVEFRONT_SIZE ),
+		m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
+		m_maxBatchesWithinWave( 0 ),
+		m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
+		m_numWavefronts( 0 ),
+		m_maxVertex( 0 ),
+		m_dx11NumBatchesAndVerticesWithinWaves( d3dDevice, d3dDeviceContext, &m_numBatchesAndVerticesWithinWaves, true ),
+		m_dx11WavefrontVerticesGlobalAddresses( d3dDevice, d3dDeviceContext, &m_wavefrontVerticesGlobalAddresses, true ),
+		m_dx11LinkVerticesLocalAddresses( d3dDevice, d3dDeviceContext, &m_linkVerticesLocalAddresses, true ),
+		m_dx11LinkStrength( d3dDevice, d3dDeviceContext, &m_linkStrength, true ),
+		m_dx11LinksMassLSC( d3dDevice, d3dDeviceContext, &m_linksMassLSC, true ),
+		m_dx11LinksRestLengthSquared( d3dDevice, d3dDeviceContext, &m_linksRestLengthSquared, true ),
+		m_dx11LinksRestLength( d3dDevice, d3dDeviceContext, &m_linksRestLength, true ),
+		m_dx11LinksMaterialLinearStiffnessCoefficient( d3dDevice, d3dDeviceContext, &m_linksMaterialLinearStiffnessCoefficient, true )
+{
+	m_d3dDevice = d3dDevice;
+	m_d3dDeviceContext = d3dDeviceContext;
+}
+
+btSoftBodyLinkDataDX11SIMDAware::~btSoftBodyLinkDataDX11SIMDAware()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+void btSoftBodyLinkDataDX11SIMDAware::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+void btSoftBodyLinkDataDX11SIMDAware::setLinkAt( const btSoftBodyLinkData::LinkDescription &link, int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	if( link.getVertex0() > m_maxVertex )
+		m_maxVertex = link.getVertex0();
+	if( link.getVertex1() > m_maxVertex )
+		m_maxVertex = link.getVertex1();
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataDX11SIMDAware::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataDX11SIMDAware::moveToAccelerator()
+{
+	bool success = true;
+
+	success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveToGPU();
+	success = success && m_dx11WavefrontVerticesGlobalAddresses.moveToGPU();
+	success = success && m_dx11LinkVerticesLocalAddresses.moveToGPU();
+	success = success && m_dx11LinkStrength.moveToGPU();
+	success = success && m_dx11LinksMassLSC.moveToGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveToGPU();
+	success = success && m_dx11LinksRestLength.moveToGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyLinkDataDX11SIMDAware::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_dx11NumBatchesAndVerticesWithinWaves.moveFromGPU();
+	success = success && m_dx11WavefrontVerticesGlobalAddresses.moveFromGPU();
+	success = success && m_dx11LinkVerticesLocalAddresses.moveFromGPU();
+	success = success && m_dx11LinkStrength.moveFromGPU();
+	success = success && m_dx11LinksMassLSC.moveFromGPU();
+	success = success && m_dx11LinksRestLengthSquared.moveFromGPU();
+	success = success && m_dx11LinksRestLength.moveFromGPU();
+	success = success && m_dx11LinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success )
+		m_onGPU = false;
+
+	return success;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+btDX11SIMDAwareSoftBodySolver::btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory) :
+	btDX11SoftBodySolver( dx11Device, dx11Context, dx11CompileFromMemory ),
+	m_linkData(m_dx11Device, m_dx11Context)
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+}
+
+btDX11SIMDAwareSoftBodySolver::~btDX11SIMDAwareSoftBodySolver()
+{
+	releaseKernels();
+}
+
+
+btSoftBodyLinkData &btDX11SIMDAwareSoftBodySolver::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+
+
+void btDX11SIMDAwareSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate)
+{
+	if(forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btAcceleratedSoftBodyInterface *newSoftBody = new btAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothMinBounds.push_back( UIntVector3( 0, 0, 0 ) );
+			m_perClothMaxBounds.push_back( UIntVector3( UINT_MAX, UINT_MAX, UINT_MAX ) );
+			m_perClothFriction.push_back( softBody->getFriction() );
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
+			// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
+			int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+		}
+
+
+
+		updateConstants(0.f);
+
+
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+
+		
+		// Build the shaders to match the batching parameters
+		buildShaders();
+	}
+
+}
+
+
+
+void btDX11SIMDAwareSoftBodySolver::solveConstraints( float solverdt )
+{
+
+	//std::cerr << "'GPU' solve constraints\n";
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+	int numLinks = m_linkData.getNumLinks();
+	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_dx11PerClothDampingFactor.moveToGPU();
+	m_dx11PerClothVelocityCorrectionCoefficient.moveToGPU();
+
+	
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+
+	
+	prepareCollisionConstraints();
+
+
+	// Solve drift
+  	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+
+		for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
+		{
+			int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
+			int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
+
+			solveLinksForPosition( startWave, numWaves, kst, ti );
+		}	
+
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+
+
+	
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+
+} // btDX11SIMDAwareSoftBodySolver::solveConstraints
+
+
+void btDX11SIMDAwareSoftBodySolver::updateConstants( float timeStep )
+{
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+} // btDX11SIMDAwareSoftBodySolver::updateConstants
+
+//////////////////////////////////////
+// Kernel dispatches
+
+
+void btDX11SIMDAwareSoftBodySolver::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
+{
+
+
+	m_vertexData.moveToAccelerator();
+	m_linkData.moveToAccelerator();
+
+	// Copy kernel parameters to GPU
+	SolvePositionsFromLinksKernelCB constBuffer;
+
+	// Set the first wave of the batch and the number of waves
+	constBuffer.startWave = startWave;
+	constBuffer.numWaves = numWaves;
+
+	constBuffer.kst = kst;
+	constBuffer.ti = ti;
+	
+	D3D11_MAPPED_SUBRESOURCE MappedResource = {0};
+	m_dx11Context->Map( solvePositionsFromLinksKernel.constBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &MappedResource );
+	memcpy( MappedResource.pData, &constBuffer, sizeof(SolvePositionsFromLinksKernelCB) );	
+	m_dx11Context->Unmap( solvePositionsFromLinksKernel.constBuffer, 0 );
+	m_dx11Context->CSSetConstantBuffers( 0, 1, &solvePositionsFromLinksKernel.constBuffer );
+
+	// Set resources and dispatch
+	m_dx11Context->CSSetShaderResources( 0, 1, &(m_linkData.m_dx11NumBatchesAndVerticesWithinWaves.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 1, 1, &(m_linkData.m_dx11WavefrontVerticesGlobalAddresses.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 2, 1, &(m_vertexData.m_dx11VertexInverseMass.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 3, 1, &(m_linkData.m_dx11LinkVerticesLocalAddresses.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 4, 1, &(m_linkData.m_dx11LinksMassLSC.getSRV()) );
+	m_dx11Context->CSSetShaderResources( 5, 1, &(m_linkData.m_dx11LinksRestLengthSquared.getSRV()) );
+	
+	m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &(m_vertexData.m_dx11VertexPosition.getUAV()), NULL );
+
+	// Execute the kernel
+	m_dx11Context->CSSetShader( solvePositionsFromLinksKernel.kernel, NULL, 0 );
+
+	int	numBlocks = ((constBuffer.numWaves + WAVEFRONT_BLOCK_MULTIPLIER - 1) / WAVEFRONT_BLOCK_MULTIPLIER );
+	m_dx11Context->Dispatch(numBlocks , 1, 1 );
+
+	{
+		// Tidy up 
+		ID3D11ShaderResourceView* pViewNULL = NULL;
+		m_dx11Context->CSSetShaderResources( 0, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 1, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 2, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 3, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 4, 1, &pViewNULL );
+		m_dx11Context->CSSetShaderResources( 5, 1, &pViewNULL );
+
+		ID3D11UnorderedAccessView* pUAViewNULL = NULL;
+		m_dx11Context->CSSetUnorderedAccessViews( 0, 1, &pUAViewNULL, NULL );
+
+		ID3D11Buffer *pBufferNull = NULL;
+		m_dx11Context->CSSetConstantBuffers( 0, 1, &pBufferNull );
+	}	
+} // btDX11SIMDAwareSoftBodySolver::solveLinksForPosition
+
+
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+
+
+
+
+
+
+
+bool btDX11SIMDAwareSoftBodySolver::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+
+	if( m_shadersInitialized )
+		return true;
+
+	
+	updatePositionsFromVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsFromVelocitiesHLSLString, "UpdatePositionsFromVelocitiesKernel", sizeof(UpdatePositionsFromVelocitiesCB) );
+	if( !updatePositionsFromVelocitiesKernel.constBuffer )
+		returnVal = false;
+	
+	char maxVerticesPerWavefront[20];
+	char maxBatchesPerWavefront[20];
+	char waveFrontSize[20];
+	char waveFrontBlockMultiplier[20];
+	char blockSize[20];
+
+	sprintf(maxVerticesPerWavefront, "%d", m_linkData.getMaxVerticesPerWavefront());
+	sprintf(maxBatchesPerWavefront, "%d", m_linkData.getMaxBatchesPerWavefront());
+	sprintf(waveFrontSize, "%d", m_linkData.getWavefrontSize());	
+	sprintf(waveFrontBlockMultiplier, "%d", WAVEFRONT_BLOCK_MULTIPLIER);
+	sprintf(blockSize, "%d", WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
+	
+	D3D10_SHADER_MACRO solvePositionsMacros[6] = { "MAX_NUM_VERTICES_PER_WAVE", maxVerticesPerWavefront, "MAX_BATCHES_PER_WAVE", maxBatchesPerWavefront, "WAVEFRONT_SIZE", waveFrontSize, "WAVEFRONT_BLOCK_MULTIPLIER", waveFrontBlockMultiplier, "BLOCK_SIZE", blockSize, 0, 0 };
+
+	solvePositionsFromLinksKernel = dxFunctions.compileComputeShaderFromString( SolvePositionsSIMDBatchedHLSLString, "SolvePositionsFromLinksKernel", sizeof(SolvePositionsFromLinksKernelCB), solvePositionsMacros );
+	if( !solvePositionsFromLinksKernel.constBuffer )
+		returnVal = false;
+
+	updateVelocitiesFromPositionsWithVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNodesHLSLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithVelocitiesCB) );
+	if( !updateVelocitiesFromPositionsWithVelocitiesKernel.constBuffer )
+		returnVal = false;
+	updateVelocitiesFromPositionsWithoutVelocitiesKernel = dxFunctions.compileComputeShaderFromString( UpdatePositionsHLSLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", sizeof(UpdateVelocitiesFromPositionsWithoutVelocitiesCB));
+	if( !updateVelocitiesFromPositionsWithoutVelocitiesKernel.constBuffer )
+		returnVal = false;
+	integrateKernel = dxFunctions.compileComputeShaderFromString( IntegrateHLSLString, "IntegrateKernel", sizeof(IntegrateCB) );
+	if( !integrateKernel.constBuffer )
+		returnVal = false;
+	applyForcesKernel = dxFunctions.compileComputeShaderFromString( ApplyForcesHLSLString, "ApplyForcesKernel", sizeof(ApplyForcesCB) );
+	if( !applyForcesKernel.constBuffer )
+		returnVal = false;
+	solveCollisionsAndUpdateVelocitiesKernel = dxFunctions.compileComputeShaderFromString( SolveCollisionsAndUpdateVelocitiesHLSLString, "SolveCollisionsAndUpdateVelocitiesKernel", sizeof(SolveCollisionsAndUpdateVelocitiesCB) );
+	if( !solveCollisionsAndUpdateVelocitiesKernel.constBuffer )
+		returnVal = false;
+	resetNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "ResetNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !resetNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	normalizeNormalsAndAreasKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "NormalizeNormalsAndAreasKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !normalizeNormalsAndAreasKernel.constBuffer )
+		returnVal = false;
+	updateSoftBodiesKernel = dxFunctions.compileComputeShaderFromString( UpdateNormalsHLSLString, "UpdateSoftBodiesKernel", sizeof(UpdateSoftBodiesCB) );
+	if( !updateSoftBodiesKernel.constBuffer )
+		returnVal = false;
+	
+	computeBoundsKernel = dxFunctions.compileComputeShaderFromString( ComputeBoundsHLSLString, "ComputeBoundsKernel", sizeof(ComputeBoundsCB) );
+	if( !computeBoundsKernel.constBuffer )
+		returnVal = false;
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+} // btDX11SIMDAwareSoftBodySolver::buildShaders
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+
+
+
+
+
+
+
+
+
+
+
+static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
+{
+	// A per-batch map of truth values stating whether a given vertex is in that batch
+	// This allows us to significantly optimize the batching
+	btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
+
+	for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
+	{
+		btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
+
+		int batch = 0;
+		bool placed = false;
+		while( batch < wavefrontBatches.size() && !placed )
+		{
+			// Test the current batch, see if this wave shares any vertex with the waves in the batch
+			bool foundSharedVertex = false;
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
+				{
+					foundSharedVertex = true;
+				}
+			}
+
+			if( !foundSharedVertex )
+			{
+				wavefrontBatches[batch].push_back( waveIndex );	
+				// Insert vertices into this batch too
+				for( int link = 0; link < wavefront.size(); ++link )
+				{
+					btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+					(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+					(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+				}
+				placed = true;
+			}
+			batch++;
+		}
+		if( batch == wavefrontBatches.size() && !placed )
+		{
+			wavefrontBatches.resize( batch + 1 );
+			wavefrontBatches[batch].push_back( waveIndex );
+
+			// And resize map as well
+			mapOfVerticesInBatches.resize( batch + 1 );
+			
+			// Resize maps with total number of vertices
+			mapOfVerticesInBatches[batch].resize( numVertices+1, false );
+
+			// Insert vertices into this batch too
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+				(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+			}
+		}
+	}
+	mapOfVerticesInBatches.clear();
+}
+
+// Function to remove an object from a vector maintaining correct ordering of the vector
+template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
+{
+	int currentSize = vectorToUpdate.size();
+	for( int i = indexToRemove; i < (currentSize-1); ++i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i+1];
+	}
+	if( currentSize > 0 )
+		vectorToUpdate.resize( currentSize - 1 );
+}
+
+/**
+ * Insert element into vectorToUpdate at index index.
+ */
+template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
+{
+	vectorToUpdate.resize( vectorToUpdate.size() + 1 );
+	for( int i = (vectorToUpdate.size() - 1); i > index; --i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i-1];
+	}
+	vectorToUpdate[index] = element;
+}
+
+/** 
+ * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
+ * ie it treats vectorToUpdate as an ordered set.
+ */
+template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
+{
+	int index = 0;
+	while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
+	{
+		index++;
+	}
+	if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
+		insertAtIndex( vectorToUpdate, index, element );
+}
+
+static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
+{
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numLinksPerVertex[nodes.vertex0]++;
+		numLinksPerVertex[nodes.vertex1]++;
+	}
+	int maxLinksPerVertex = 0;
+	for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+	{
+		maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
+	}
+	maxLinks = maxLinksPerVertex;
+
+	btAlignedObjectArray< int > linksFoundPerVertex;
+	linksFoundPerVertex.resize( numVertices, 0 );
+
+	listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
+
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		{
+			// Do vertex 0
+			int vertexIndex = nodes.vertex0;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex0];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
+		}
+		{
+			// Do vertex 1
+			int vertexIndex = nodes.vertex1;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex1];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
+		}
+	}
+}
+
+static void computeBatchingIntoWavefronts( 
+	btSoftBodyLinkData &linkData, 
+	int wavefrontSize, 
+	int linksPerWorkItem, 
+	int maxLinksPerWavefront, 
+	btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
+	btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
+	)
+{
+	
+
+	// Attempt generation of larger batches of links.
+	btAlignedObjectArray< bool > processedLink;
+	processedLink.resize( linkData.getNumLinks() );
+	btAlignedObjectArray< int > listOfLinksPerVertex;
+	int maxLinksPerVertex = 0;
+
+	// Count num vertices
+	int numVertices = 0;
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numVertices = btMax( numVertices, nodes.vertex0 + 1 );
+		numVertices = btMax( numVertices, nodes.vertex1 + 1 );
+	}
+
+	// Need list of links per vertex
+	// Compute valence of each vertex
+	btAlignedObjectArray <int> numLinksPerVertex;
+	numLinksPerVertex.resize(0);
+	numLinksPerVertex.resize( numVertices, 0 );
+
+	generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
+
+
+	// At this point we know what links we have for each vertex so we can start batching
+	
+	// We want a vertex to start with, let's go with 0
+	int currentVertex = 0;
+	int linksProcessed = 0;
+
+	btAlignedObjectArray <int> verticesToProcess;
+
+	while( linksProcessed < linkData.getNumLinks() )
+	{
+		// Next wavefront
+		int nextWavefront = linksForWavefronts.size();
+		linksForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
+		verticesForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
+
+		linksForWavefront.resize(0);
+
+		// Loop to find enough links to fill the wavefront
+		// Stopping if we either run out of links, or fill it
+		while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
+		{
+			// Go through the links for the current vertex
+			for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
+			{
+				int linkAddress = currentVertex * maxLinksPerVertex + link;
+				int linkIndex = listOfLinksPerVertex[linkAddress];
+				
+				// If we have not already processed this link, add it to the wavefront
+				// Claim it as another processed link
+				// Add the vertex at the far end to the list of vertices to process.
+				if( !processedLink[linkIndex] )
+				{
+					linksForWavefront.push_back( linkIndex );
+					linksProcessed++;
+					processedLink[linkIndex] = true;
+					int v0 = linkData.getVertexPair(linkIndex).vertex0;
+					int v1 = linkData.getVertexPair(linkIndex).vertex1;
+					if( v0 == currentVertex )
+						verticesToProcess.push_back( v1 );
+					else
+						verticesToProcess.push_back( v0 );
+				}
+			}
+			if( verticesToProcess.size() > 0 )
+			{
+				// Get the element on the front of the queue and remove it
+				currentVertex = verticesToProcess[0];
+				removeFromVector( verticesToProcess, 0 );
+			} else {		
+				// If we've not yet processed all the links, find the first unprocessed one
+				// and select one of its vertices as the current vertex
+				if( linksProcessed < linkData.getNumLinks() )
+				{
+					int searchLink = 0;
+					while( processedLink[searchLink] )
+						searchLink++;
+					currentVertex = linkData.getVertexPair(searchLink).vertex0;
+				}	
+			}
+		}
+
+		// We have either finished or filled a wavefront
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
+			int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
+			insertUniqueAndOrderedIntoVector( vertexSet, v0 );
+			insertUniqueAndOrderedIntoVector( vertexSet, v1 );
+		}
+		// Iterate over links mapped to the wave and batch those
+		// We can run a batch on each cycle trivially
+		
+		batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
+		btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
+		
+
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int linkIndex = linksForWavefront[link];
+			btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
+			
+			int batch = 0;
+			bool placed = false;
+			while( batch < batchesWithinWave.size() && !placed )
+			{
+				bool foundSharedVertex = false;
+				if( batchesWithinWave[batch].size() >= wavefrontSize )
+				{
+					// If we have already filled this batch, move on to another
+					foundSharedVertex = true;
+				} else {
+					for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
+					{
+						btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
+
+						if( vertices.vertex0 == vertices2.vertex0 ||
+							vertices.vertex1 == vertices2.vertex0 ||
+							vertices.vertex0 == vertices2.vertex1 ||
+							vertices.vertex1 == vertices2.vertex1 )
+						{
+							foundSharedVertex = true;
+							break;
+						}
+					}
+				}
+				if( !foundSharedVertex )
+				{
+					batchesWithinWave[batch].push_back( linkIndex );
+					placed = true;
+				} else {
+					++batch;
+				}
+			}
+			if( batch == batchesWithinWave.size() && !placed )
+			{
+				batchesWithinWave.resize( batch + 1 );
+				batchesWithinWave[batch].push_back( linkIndex );
+			}
+		}
+		
+	}
+
+}
+
+void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
+{
+	btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
+	btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
+	btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
+
+	// Group the links into wavefronts
+	computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
+
+
+	// Batch the wavefronts
+	generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
+
+	m_numWavefronts = linksForWavefronts.size();
+
+	// At this point we have a description of which links we need to process in each wavefront
+
+	// First correctly fill the batch ranges vector
+	int numBatches = wavefrontBatches.size();
+	m_wavefrontBatchStartLengths.resize(0);
+	int prefixSum = 0;
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		int wavesInBatch = wavefrontBatches[batchIndex].size();
+		int nextPrefixSum = prefixSum + wavesInBatch;
+		m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
+
+		prefixSum += wavesInBatch;
+	}
+	
+	// Also find max number of batches within a wave
+	m_maxBatchesWithinWave = 0;
+	m_maxVerticesWithinWave = 0;
+	m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
+	for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
+	{
+		// See if the number of batches in this wave is greater than the current maxium
+		int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
+		int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
+		m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
+		m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
+	}
+	
+	// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
+	m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
+
+	// Now we know the maximum number of vertices per-wave we can resize the global vertices array
+	m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
+
+	// Grab backup copies of all the link data arrays for the sorting process
+	btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>				m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	//btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	//btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+	// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
+	m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );	
+		
+	// Then re-order links into wavefront blocks
+
+	// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
+	int wavefrontCount = 0;
+
+	// Iterate over batches of wavefronts, then wavefronts in the batch
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
+		int wavefrontsInBatch = batch.size();
+
+		
+		for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
+		{	
+
+			int originalWavefrontIndex = batch[wavefrontIndex];
+			btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
+			int verticesUsedByWavefront = wavefrontVertices.size();
+
+			// Copy the set of vertices into the correctly structured array for use on the device
+			// Fill the non-vertices with -1s
+			// so we can mask out those reads
+			for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
+			}
+			for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
+			}
+
+			// Obtain the set of batches within the current wavefront
+			btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
+			// Set the size of the batches for use in the solver, correctly ordered
+			NumBatchesVerticesPair batchesAndVertices;
+			batchesAndVertices.numBatches = batchesWithinWavefront.size();
+			batchesAndVertices.numVertices = verticesUsedByWavefront;
+			m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
+			
+
+			// Now iterate over batches within the wavefront to structure the links correctly
+			for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
+			{
+				btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
+				int wavefrontBatchSize = linksInBatch.size();
+
+				int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
+
+				for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
+				{
+					int originalLinkAddress = linksInBatch[linkIndex];
+					// Reorder simple arrays trivially
+					m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
+					m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
+					m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
+
+					// The local address is more complicated. We need to work out where a given vertex will end up
+					// by searching the set of vertices for this link and using the index as the local address
+					btSoftBodyLinkData::LinkNodePair localPair;
+					btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
+					localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
+					localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+				for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
+				{
+					// Put 0s into these arrays for padding for cleanliness
+					m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
+					m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
+
+
+					// For local addresses of junk data choose a set of addresses just above the range of valid ones 
+					// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
+					// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
+					// off the end of that range so we need no control
+					btSoftBodyLinkData::LinkNodePair localPair;
+					localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
+					localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+
+			}
+
+			
+			wavefrontCount++;
+		}
+
+	
+	}
+
+} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h
new file mode 100644
index 000000000..348819738
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/btSoftBodySolver_DX11SIMDAware.h
@@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "vectormath/vmInclude.h"
+#include "btSoftBodySolver_DX11.h"
+#include "btSoftBodySolverVertexBuffer_DX11.h"
+#include "btSoftBodySolverLinkData_DX11SIMDAware.h"
+#include "btSoftBodySolverVertexData_DX11.h"
+#include "btSoftBodySolverTriangleData_DX11.h"
+
+
+#ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+#define BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+
+class btDX11SIMDAwareSoftBodySolver : public btDX11SoftBodySolver
+{
+protected:
+	struct SolvePositionsFromLinksKernelCB
+	{		
+		int startWave;
+		int numWaves;
+		float kst;
+		float ti;
+	};
+
+
+	/** Link data for all cloths. Note that this will be sorted batch-wise for efficient computation and m_linkAddresses will maintain the addressing. */
+	btSoftBodyLinkDataDX11SIMDAware m_linkData;
+		
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	
+	virtual bool buildShaders();
+
+	void updateConstants( float timeStep );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	
+
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+
+
+
+public:
+	btDX11SIMDAwareSoftBodySolver(ID3D11Device * dx11Device, ID3D11DeviceContext* dx11Context, DXFunctions::CompileFromMemoryFunc dx11CompileFromMemory = &D3DX11CompileFromMemory);
+
+	virtual ~btDX11SIMDAwareSoftBodySolver();
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void solveConstraints( float solverdt );
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DX_SIMD_SOLVER;
+	}
+	
+};
+
+#endif // #ifndef BT_SOFT_BODY_DX11_SOLVER_SIMDAWARE_H
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/premake4.lua b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/premake4.lua
new file mode 100644
index 000000000..4625306dc
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/DX11/premake4.lua
@@ -0,0 +1,23 @@
+	
+hasDX11 = findDirectX11()
+	
+if (hasDX11) then
+	
+	project "BulletSoftBodyDX11Solvers"
+		
+  initDirectX11()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../lib"
+	
+	includedirs {
+		".",
+		"../../.."
+	}
+	files {
+		"**.cpp",
+		"**.h"
+	}
+
+end
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt
new file mode 100644
index 000000000..9826f6378
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/CMakeLists.txt
@@ -0,0 +1,62 @@
+
+INCLUDE_DIRECTORIES(
+	${BULLET_PHYSICS_SOURCE_DIR}/src
+	${AMD_OPENCL_INCLUDES}
+)
+
+ADD_DEFINITIONS(-DUSE_AMD_OPENCL)
+ADD_DEFINITIONS(-DCL_PLATFORM_AMD)
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+	../btSoftBodySolverOutputCLtoGL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../btSoftBodySolver_OpenCLSIMDAware.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCLSIMDAware.h
+	../btSoftBodySolverBuffer_OpenCL.h
+	../btSoftBodySolverVertexBuffer_OpenGL.h
+	../btSoftBodySolverOutputCLtoGL.h
+)
+
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_AMD
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_AMD BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_AMD DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_AMD PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/premake4.lua b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/premake4.lua
new file mode 100644
index 000000000..8c663a8cb
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/AMD/premake4.lua
@@ -0,0 +1,27 @@
+	
+hasCL = findOpenCL_AMD()
+	
+if (hasCL) then
+	
+	project "BulletSoftBodySolvers_OpenCL_AMD"
+		
+ 	defines { "USE_AMD_OPENCL","CL_PLATFORM_AMD"}
+
+	initOpenCL_AMD()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../../lib"
+	
+	includedirs {
+		".",
+		"../../../..",
+		"../../../../../Glut"
+	}
+	files {
+		"../btSoftBodySolver_OpenCL.cpp",
+		"../btSoftBodySolver_OpenCLSIMDAware.cpp",
+		"../btSoftBodySolverOutputCLtoGL.cpp"
+	}
+
+end
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt
new file mode 100644
index 000000000..35dd4eb2d
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Apple/CMakeLists.txt
@@ -0,0 +1,77 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverBuffer_OpenCL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Apple
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	IF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
+		SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES LINK_FLAGS "-framework OpenCL")
+	ENDIF (APPLE AND (BUILD_SHARED_LIBS OR FRAMEWORK) )
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Apple BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple  DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Apple  DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Apple PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt
new file mode 100644
index 000000000..cf9a0be28
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/CMakeLists.txt
@@ -0,0 +1,17 @@
+	SUBDIRS( MiniCL  )
+
+IF(BUILD_INTEL_OPENCL_DEMOS)
+	SUBDIRS(Intel)
+ENDIF()
+
+IF(BUILD_AMD_OPENCL_DEMOS)
+	SUBDIRS(AMD)
+ENDIF()
+
+IF(BUILD_NVIDIA_OPENCL_DEMOS)
+	SUBDIRS(NVidia)
+ENDIF()
+
+IF(APPLE AND OPENCL_LIBRARY) 
+	SUBDIRS(Apple)
+ENDIF()
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt
new file mode 100644
index 000000000..9503a5f40
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/CMakeLists.txt
@@ -0,0 +1,82 @@
+
+INCLUDE_DIRECTORIES(
+	${BULLET_PHYSICS_SOURCE_DIR}/src
+	${INTEL_OPENCL_INCLUDES}
+)
+
+ADD_DEFINITIONS(-DUSE_INTEL_OPENCL)
+ADD_DEFINITIONS(-DCL_PLATFORM_INTEL)
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+	../btSoftBodySolverOutputCLtoGL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../btSoftBodySolver_OpenCLSIMDAware.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCLSIMDAware.h
+	../btSoftBodySolverBuffer_OpenCL.h
+	../btSoftBodySolverVertexBuffer_OpenGL.h
+	../btSoftBodySolverOutputCLtoGL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Intel
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Intel BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Intel DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Intel PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/premake4.lua b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/premake4.lua
new file mode 100644
index 000000000..668886d17
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/Intel/premake4.lua
@@ -0,0 +1,27 @@
+	
+hasCL = findOpenCL_Intel()
+	
+if (hasCL) then
+	
+	project "BulletSoftBodySolvers_OpenCL_Intel"
+		
+ 	defines { "USE_INTEL_OPENCL","CL_PLATFORM_INTEL"}
+
+	initOpenCL_Intel()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../../lib"
+	
+	includedirs {
+		".",
+		"../../../..",
+		"../../../../../Glut"
+	}
+	files {
+		"../btSoftBodySolver_OpenCL.cpp",
+		"../btSoftBodySolver_OpenCLSIMDAware.cpp",
+		"../btSoftBodySolverOutputCLtoGL.cpp"
+	}
+
+end
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt
new file mode 100644
index 000000000..2ca3ca087
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/CMakeLists.txt
@@ -0,0 +1,75 @@
+
+INCLUDE_DIRECTORIES(
+${BULLET_PHYSICS_SOURCE_DIR}/src
+)
+
+ADD_DEFINITIONS(-DUSE_MINICL)
+
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverBuffer_OpenCL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_Mini
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_Mini MiniCL BulletMultiThreaded BulletSoftBody)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_Mini DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_Mini PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp
new file mode 100644
index 000000000..dfa60e66c
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/MiniCL/MiniCLTaskWrap.cpp
@@ -0,0 +1,249 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <MiniCL/cl_MiniCL_Defs.h>
+
+#define MSTRINGIFY(A) A
+#include "../OpenCLC10/ApplyForces.cl"
+#include "../OpenCLC10/Integrate.cl"
+#include "../OpenCLC10/PrepareLinks.cl"
+#include "../OpenCLC10/SolvePositions.cl"
+#include "../OpenCLC10/UpdateNodes.cl"
+#include "../OpenCLC10/UpdateNormals.cl"
+#include "../OpenCLC10/UpdatePositions.cl"
+#include "../OpenCLC10/UpdatePositionsFromVelocities.cl"
+#include "../OpenCLC10/VSolveLinks.cl"
+#include "../OpenCLC10/UpdateFixedVertexPositions.cl"
+//#include "../OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
+
+
+MINICL_REGISTER(PrepareLinksKernel)
+MINICL_REGISTER(VSolveLinksKernel)
+MINICL_REGISTER(UpdatePositionsFromVelocitiesKernel)
+MINICL_REGISTER(SolvePositionsFromLinksKernel)
+MINICL_REGISTER(updateVelocitiesFromPositionsWithVelocitiesKernel)
+MINICL_REGISTER(updateVelocitiesFromPositionsWithoutVelocitiesKernel)
+MINICL_REGISTER(IntegrateKernel)
+MINICL_REGISTER(ApplyForcesKernel)
+MINICL_REGISTER(ResetNormalsAndAreasKernel)
+MINICL_REGISTER(NormalizeNormalsAndAreasKernel)
+MINICL_REGISTER(UpdateSoftBodiesKernel)
+MINICL_REGISTER(UpdateFixedVertexPositions)
+
+float mydot3a(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+typedef struct 
+{
+	int firstObject;
+	int endObject;
+} CollisionObjectIndices;
+
+typedef struct 
+{
+	float4 shapeTransform[4]; // column major 4x4 matrix
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+	int upAxis;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	
+} CollisionShapeDescription;
+
+// From btBroadphaseProxy.h
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Multiply column-major matrix against vector
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+	float4 returnVector;
+	float4 row0 = float4(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+	float4 row1 = float4(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+	float4 row2 = float4(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+	float4 row3 = float4(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+	returnVector.x = dot(row0, vector);
+	returnVector.y = dot(row1, vector);
+	returnVector.z = dot(row2, vector);
+	returnVector.w = dot(row3, vector);
+	return returnVector;
+}
+
+__kernel void 
+SolveCollisionsAndUpdateVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global int *g_vertexClothIdentifier,
+	__global float4 *g_vertexPreviousPositions,
+	__global float * g_perClothFriction,
+	__global float * g_clothDampingFactor,
+	__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+	__global CollisionShapeDescription * g_collisionObjectDetails,
+	__global float4 * g_vertexForces,
+	__global float4 *g_vertexVelocities,
+	__global float4 *g_vertexPositions GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
+	
+	if( get_global_id(0) < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		
+		// Abort if this is not a valid cloth
+		if( clothIdentifier < 0 )
+			return;
+
+
+		float4 position (g_vertexPositions[nodeID].xyz, 1.f);
+		float4 previousPosition (g_vertexPreviousPositions[nodeID].xyz, 1.f);
+			
+		float clothFriction = g_perClothFriction[clothIdentifier];
+		float dampingFactor = g_clothDampingFactor[clothIdentifier];
+		float velocityCoefficient = (1.f - dampingFactor);		
+		float4 difference = position - previousPosition;
+		float4 velocity = difference*velocityCoefficient*isolverdt;
+		
+		CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		
+		if( numObjects > 0 )
+		{
+			// We have some possible collisions to deal with
+			for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+			{
+				CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+				float colliderFriction = shapeDescription.friction;
+
+				if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+				{
+					// Colliding with a capsule
+
+					float capsuleHalfHeight = shapeDescription.halfHeight;
+					float capsuleRadius = shapeDescription.radius;
+					float capsuleMargin = shapeDescription.margin;
+					int capsuleupAxis = shapeDescription.upAxis;
+
+					// Four columns of worldTransform matrix
+					float4 worldTransform[4];
+					worldTransform[0] = shapeDescription.shapeTransform[0];
+					worldTransform[1] = shapeDescription.shapeTransform[1];
+					worldTransform[2] = shapeDescription.shapeTransform[2];
+					worldTransform[3] = shapeDescription.shapeTransform[3];
+
+					// Correctly define capsule centerline vector 
+					float4 c1 (0.f, 0.f, 0.f, 1.f); 
+					float4 c2 (0.f, 0.f, 0.f, 1.f);
+					c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+					c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+					c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+					c2.x = -c1.x;
+					c2.y = -c1.y;
+					c2.z = -c1.z;
+
+
+					float4 worldC1 = matrixVectorMul(worldTransform, c1);
+					float4 worldC2 = matrixVectorMul(worldTransform, c2);
+					float4 segment = (worldC2 - worldC1);
+
+					// compute distance of tangent to vertex along line segment in capsule
+					float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
+
+					float4 closestPoint = (worldC1 + (segment * distanceAlongSegment));
+					float distanceFromLine = length(position - closestPoint);
+					float distanceFromC1 = length(worldC1 - position);
+					float distanceFromC2 = length(worldC2 - position);
+					
+					// Final distance from collision, point to push from, direction to push in
+					// for impulse force
+					float dist;
+					float4 normalVector;
+					if( distanceAlongSegment < 0 )
+					{
+						dist = distanceFromC1;
+						normalVector = float4(normalize(position - worldC1).xyz, 0.f);
+					} else if( distanceAlongSegment > 1.f ) {
+						dist = distanceFromC2;
+						normalVector = float4(normalize(position - worldC2).xyz, 0.f);	
+					} else {
+						dist = distanceFromLine;
+						normalVector = float4(normalize(position - closestPoint).xyz, 0.f);
+					}
+						
+					float4 colliderLinearVelocity = shapeDescription.linearVelocity;
+					float4 colliderAngularVelocity = shapeDescription.angularVelocity;
+					float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - float4(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
+
+					float minDistance = capsuleRadius + capsuleMargin;
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+					
+					
+					// Check for a collision
+					if( dist < minDistance )
+					{
+						// Project back to surface along normal
+						position = position + float4(normalVector*(minDistance - dist)*0.9f);
+						velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+						float4 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+						float4 p1 = normalize(cross(normalVector, segment));
+						float4 p2 = normalize(cross(p1, normalVector));
+						// Full friction is sum of velocities in each direction of plane
+						float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
+
+						// Real friction is peak friction corrected by friction coefficients
+						frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+						float approachSpeed = dot(relativeVelocity, normalVector);
+
+						if( approachSpeed <= 0.0f )
+							forceOnVertex -= frictionVector;
+					}
+				}
+			}
+		}
+
+		g_vertexVelocities[nodeID] = float4(velocity.xyz, 0.f);	
+
+		// Update external force
+		g_vertexForces[nodeID] = float4(forceOnVertex.xyz, 0.f);
+
+		g_vertexPositions[nodeID] = float4(position.xyz, 0.f);
+	}
+}
+
+
+MINICL_REGISTER(SolveCollisionsAndUpdateVelocitiesKernel);
+
+
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt
new file mode 100644
index 000000000..ea0406390
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/CMakeLists.txt
@@ -0,0 +1,81 @@
+
+ADD_DEFINITIONS(-DUSE_NVIDIA_OPENCL)
+ADD_DEFINITIONS(-DCL_PLATFORM_NVIDIA)
+
+INCLUDE_DIRECTORIES(
+	${BULLET_PHYSICS_SOURCE_DIR}/src
+	${NVIDIA_OPENCL_INCLUDES}
+)
+
+
+
+SET(BulletSoftBodyOpenCLSolvers_SRCS
+	../btSoftBodySolver_OpenCL.cpp
+	../btSoftBodySolver_OpenCLSIMDAware.cpp
+	../btSoftBodySolverOutputCLtoGL.cpp
+)
+
+SET(BulletSoftBodyOpenCLSolvers_HDRS
+	../btSoftBodySolver_OpenCL.h
+	../../Shared/btSoftBodySolverData.h
+	../btSoftBodySolverVertexData_OpenCL.h
+	../btSoftBodySolverTriangleData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCL.h
+	../btSoftBodySolverLinkData_OpenCLSIMDAware.h
+	../btSoftBodySolverBuffer_OpenCL.h
+	../btSoftBodySolverVertexBuffer_OpenGL.h
+	../btSoftBodySolverOutputCLtoGL.h
+)
+
+# OpenCL and HLSL Shaders.
+# Build rules generated to stringify these into headers
+# which are needed by some of the sources
+SET(BulletSoftBodyOpenCLSolvers_Shaders
+#	OutputToVertexArray
+	UpdateNormals
+	Integrate
+	UpdatePositions
+	UpdateNodes
+	SolvePositions
+	UpdatePositionsFromVelocities
+	ApplyForces
+	PrepareLinks
+	VSolveLinks
+)
+
+foreach(f ${BulletSoftBodyOpenCLSolvers_Shaders})
+    LIST(APPEND BulletSoftBodyOpenCLSolvers_OpenCLC "../OpenCLC10/${f}.cl")
+endforeach(f) 
+
+
+
+ADD_LIBRARY(BulletSoftBodySolvers_OpenCL_NVidia
+	${BulletSoftBodyOpenCLSolvers_SRCS} 
+	${BulletSoftBodyOpenCLSolvers_HDRS} 
+	${BulletSoftBodyOpenCLSolvers_OpenCLC}
+)
+
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES SOVERSION ${BULLET_VERSION})
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(BulletSoftBodySolvers_OpenCL_NVidia BulletSoftBody BulletDynamics)
+ENDIF (BUILD_SHARED_LIBS)
+
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBodySolvers_OpenCL_NVidia DESTINATION lib${LIB_SUFFIX})
+#headers are already installed by BulletMultiThreaded library
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBodySolvers_OpenCL_NVidia PROPERTIES PUBLIC_HEADER "${BulletSoftBodyOpenCLSolvers_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/premake4.lua b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/premake4.lua
new file mode 100644
index 000000000..0bab1e30f
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/NVidia/premake4.lua
@@ -0,0 +1,27 @@
+	
+hasCL = findOpenCL_NVIDIA()
+	
+if (hasCL) then
+	
+	project "BulletSoftBodySolvers_OpenCL_NVIDIA"
+		
+ 	defines { "USE_NVIDIA_OPENCL","CL_PLATFORM_NVIDIA"}
+
+	initOpenCL_NVIDIA()
+	
+	kind "StaticLib"
+	
+	targetdir "../../../../../lib"
+	
+	includedirs {
+		".",
+		"../../../..",
+		"../../../../../Glut"
+	}
+	files {
+		"../btSoftBodySolver_OpenCL.cpp",
+		"../btSoftBodySolver_OpenCLSIMDAware.cpp",
+		"../btSoftBodySolverOutputCLtoGL.cpp"
+	}
+
+end
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl
new file mode 100644
index 000000000..f9bcb88ea
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ApplyForces.cl
@@ -0,0 +1,102 @@
+MSTRINGIFY(
+
+
+float adot3(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+float alength3(float4 a)
+{
+	a.w = 0;
+	return length(a);
+}
+
+float4 anormalize3(float4 a)
+{
+	a.w = 0;
+	return normalize(a);
+}
+
+float4 projectOnAxis( float4 v, float4 a )
+{
+	return (a*adot3(v, a));
+}
+
+__kernel void 
+ApplyForcesKernel(
+	const uint numNodes,
+	const float solverdt,
+	const float epsilon,
+	__global int * g_vertexClothIdentifier,
+	__global float4 * g_vertexNormal,
+	__global float * g_vertexArea,
+	__global float * g_vertexInverseMass,
+	__global float * g_clothLiftFactor,
+	__global float * g_clothDragFactor,
+	__global float4 * g_clothWindVelocity,
+	__global float4 * g_clothAcceleration,
+	__global float * g_clothMediumDensity,
+	__global float4 * g_vertexForceAccumulator,
+	__global float4 * g_vertexVelocity GUID_ARG)
+{
+	unsigned int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{		
+		int clothId  = g_vertexClothIdentifier[nodeID];
+		float nodeIM = g_vertexInverseMass[nodeID];
+		
+		if( nodeIM > 0.0f )
+		{
+			float4 nodeV  = g_vertexVelocity[nodeID];
+			float4 normal = g_vertexNormal[nodeID];
+			float area    = g_vertexArea[nodeID];
+			float4 nodeF  = g_vertexForceAccumulator[nodeID];
+			
+			// Read per-cloth values
+			float4 clothAcceleration = g_clothAcceleration[clothId];
+			float4 clothWindVelocity = g_clothWindVelocity[clothId];
+			float liftFactor = g_clothLiftFactor[clothId];
+			float dragFactor = g_clothDragFactor[clothId];
+			float mediumDensity = g_clothMediumDensity[clothId];
+		
+			// Apply the acceleration to the cloth rather than do this via a force
+			nodeV += (clothAcceleration*solverdt);
+
+			g_vertexVelocity[nodeID] = nodeV;
+
+			// Aerodynamics
+			float4 rel_v = nodeV - clothWindVelocity;
+			float rel_v_len = alength3(rel_v);
+			float rel_v2 = dot(rel_v, rel_v);
+			
+			if( rel_v2 > epsilon )
+			{
+				float4 rel_v_nrm = anormalize3(rel_v);
+				float4 nrm = normal;
+									
+				nrm = nrm * (dot(nrm, rel_v) < 0 ? -1.f : 1.f);
+
+				float4 fDrag = (float4)(0.f, 0.f, 0.f, 0.f);
+				float4 fLift = (float4)(0.f, 0.f, 0.f, 0.f);
+
+				float n_dot_v = dot(nrm, rel_v_nrm);
+
+				// drag force
+				if ( dragFactor > 0.f )
+					fDrag = 0.5f * dragFactor * mediumDensity * rel_v2 * area * n_dot_v * (-1.0f) * rel_v_nrm;
+
+				// lift force
+				// Check angle of attack
+				// cos(10�) = 0.98480
+				if ( 0 < n_dot_v && n_dot_v < 0.98480f)
+					fLift = 0.5f * liftFactor * mediumDensity * rel_v_len * area * sqrt(1.0f-n_dot_v*n_dot_v) * (cross(cross(nrm, rel_v_nrm), rel_v_nrm));
+				
+				nodeF += fDrag + fLift;
+					g_vertexForceAccumulator[nodeID] = nodeF;	
+			}
+		}
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ComputeBounds.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ComputeBounds.cl
new file mode 100644
index 000000000..2ae7148ad
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/ComputeBounds.cl
@@ -0,0 +1,82 @@
+MSTRINGIFY(
+#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n
+#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n
+
+__kernel void
+ComputeBoundsKernel( 
+	const int numNodes,
+	const int numSoftBodies,
+	__global int * g_vertexClothIdentifier,
+	__global float4 * g_vertexPositions,
+	/* Unfortunately, to get the atomics below to work these arrays cannot be */
+	/* uint4, though that is the layout of the data */
+	/* Therefore this is little-endian-only code */
+	volatile __global uint * g_clothMinBounds,
+	volatile __global uint * g_clothMaxBounds,
+	volatile __local uint * clothMinBounds,
+	volatile __local uint * clothMaxBounds)
+{
+	// Init min and max bounds arrays
+	if( get_local_id(0) < numSoftBodies )
+	{
+		
+		clothMinBounds[get_local_id(0)*4] = UINT_MAX;
+		clothMinBounds[get_local_id(0)*4+1] = UINT_MAX;
+		clothMinBounds[get_local_id(0)*4+2] = UINT_MAX;
+		clothMinBounds[get_local_id(0)*4+3] = UINT_MAX;
+		clothMaxBounds[get_local_id(0)*4] = 0;
+		clothMaxBounds[get_local_id(0)*4+1] = 0;
+		clothMaxBounds[get_local_id(0)*4+2] = 0;
+		clothMaxBounds[get_local_id(0)*4+3] = 0;
+
+	}
+
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		if( clothIdentifier >= 0 )
+		{
+
+			float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
+
+			/* Reinterpret position as uint */
+			uint4 positionUInt = (uint4)(as_uint(position.x), as_uint(position.y), as_uint(position.z), 0);
+		
+			/* Invert sign bit of positives and whole of negatives to allow comparison as unsigned ints */
+			positionUInt.x ^= (1+~(positionUInt.x >> 31) | 0x80000000);
+			positionUInt.y ^= (1+~(positionUInt.y >> 31) | 0x80000000);		
+			positionUInt.z ^= (1+~(positionUInt.z >> 31) | 0x80000000);
+		
+			// Min/max with the LDS values
+			atom_min(&(clothMinBounds[clothIdentifier*4]), positionUInt.x);
+			atom_min(&(clothMinBounds[clothIdentifier*4+1]), positionUInt.y);
+			atom_min(&(clothMinBounds[clothIdentifier*4+2]), positionUInt.z);
+
+			atom_max(&(clothMaxBounds[clothIdentifier*4]), positionUInt.x);
+			atom_max(&(clothMaxBounds[clothIdentifier*4+1]), positionUInt.y);
+			atom_max(&(clothMaxBounds[clothIdentifier*4+2]), positionUInt.z);
+		}
+	}
+	
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+
+	/* Use global atomics to update the global versions of the data */
+	if( get_local_id(0) < numSoftBodies )
+	{
+		/*atom_min(&(g_clothMinBounds[get_local_id(0)].x), clothMinBounds[get_local_id(0)].x);*/
+		atom_min(&(g_clothMinBounds[get_local_id(0)*4]), clothMinBounds[get_local_id(0)*4]);
+		atom_min(&(g_clothMinBounds[get_local_id(0)*4+1]), clothMinBounds[get_local_id(0)*4+1]);
+		atom_min(&(g_clothMinBounds[get_local_id(0)*4+2]), clothMinBounds[get_local_id(0)*4+2]);
+
+		atom_max(&(g_clothMaxBounds[get_local_id(0)*4]), clothMaxBounds[get_local_id(0)*4]);		
+		atom_max(&(g_clothMaxBounds[get_local_id(0)*4+1]), clothMaxBounds[get_local_id(0)*4+1]);
+		atom_max(&(g_clothMaxBounds[get_local_id(0)*4+2]), clothMaxBounds[get_local_id(0)*4+2]);
+	}
+}
+
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl
new file mode 100644
index 000000000..bb2d98a53
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/Integrate.cl
@@ -0,0 +1,35 @@
+MSTRINGIFY(
+
+// Node indices for each link
+
+
+
+__kernel void
+IntegrateKernel( 
+	const int numNodes,
+	const float solverdt,
+	__global float * g_vertexInverseMasses,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexVelocity,
+	__global float4 * g_vertexPreviousPositions,
+	__global float4 * g_vertexForceAccumulator GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		float4 position   = g_vertexPositions[nodeID];
+		float4 velocity   = g_vertexVelocity[nodeID];
+		float4 force      = g_vertexForceAccumulator[nodeID];
+		float inverseMass = g_vertexInverseMasses[nodeID];
+		
+		g_vertexPreviousPositions[nodeID] = position;
+		velocity += force * inverseMass * solverdt;
+		position += velocity * solverdt;
+		
+		g_vertexForceAccumulator[nodeID] = (float4)(0.f, 0.f, 0.f, 0.0f);
+		g_vertexPositions[nodeID]        = position;
+		g_vertexVelocity[nodeID]         = velocity;	
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/OutputToVertexArray.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/OutputToVertexArray.cl
new file mode 100644
index 000000000..989137777
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/OutputToVertexArray.cl
@@ -0,0 +1,46 @@
+MSTRINGIFY(
+
+__kernel void 
+OutputToVertexArrayWithNormalsKernel( 
+	const int startNode, const int numNodes, __global float *g_vertexBuffer,
+	const int positionOffset, const int positionStride, const __global float4* g_vertexPositions, 
+	const int normalOffset, const int normalStride, const __global float4* g_vertexNormals  )
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		float4 normal = g_vertexNormals[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;
+		
+		int normalDestination = nodeID * normalStride + normalOffset;
+		g_vertexBuffer[normalDestination] = normal.x;
+		g_vertexBuffer[normalDestination+1] = normal.y;
+		g_vertexBuffer[normalDestination+2] = normal.z;		
+	}
+}
+
+__kernel void 
+OutputToVertexArrayWithoutNormalsKernel(
+	const int startNode, const int numNodes, __global float *g_vertexBuffer,
+	const int positionOffset, const int positionStride, const __global float4* g_vertexPositions )
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{			
+		float4 position = g_vertexPositions[nodeID + startNode];
+		
+		// Stride should account for the float->float4 conversion
+		int positionDestination = nodeID * positionStride + positionOffset;		
+		g_vertexBuffer[positionDestination] = position.x;
+		g_vertexBuffer[positionDestination+1] = position.y;
+		g_vertexBuffer[positionDestination+2] = position.z;		
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl
new file mode 100644
index 000000000..542a11ec2
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/PrepareLinks.cl
@@ -0,0 +1,38 @@
+MSTRINGIFY(
+
+
+
+__kernel void 
+PrepareLinksKernel( 
+	const int numLinks,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksMassLSC,
+	__global float4 * g_nodesPreviousPosition,
+	__global float * g_linksLengthRatio,
+	__global float4 * g_linksCurrentLength GUID_ARG)
+{
+	int linkID = get_global_id(0);
+	if( linkID < numLinks )
+	{	
+		
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float4 nodePreviousPosition0 = g_nodesPreviousPosition[node0];
+		float4 nodePreviousPosition1 = g_nodesPreviousPosition[node1];
+
+		float massLSC = g_linksMassLSC[linkID];
+		
+		float4 linkCurrentLength = nodePreviousPosition1 - nodePreviousPosition0;
+		linkCurrentLength.w = 0.f;
+		
+		float linkLengthRatio = dot(linkCurrentLength, linkCurrentLength)*massLSC;
+		linkLengthRatio = 1.0f/linkLengthRatio;
+		
+		g_linksCurrentLength[linkID] = linkCurrentLength;
+		g_linksLengthRatio[linkID]   = linkLengthRatio;		
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl
new file mode 100644
index 000000000..92fb939de
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocities.cl
@@ -0,0 +1,204 @@
+MSTRINGIFY(
+
+
+
+float mydot3a(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+typedef struct 
+{
+	int firstObject;
+	int endObject;
+} CollisionObjectIndices;
+
+typedef struct 
+{
+	float4 shapeTransform[4]; // column major 4x4 matrix
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+	int upAxis;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	
+} CollisionShapeDescription;
+
+// From btBroadphaseProxy.h
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Multiply column-major matrix against vector
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+	float4 returnVector;
+	float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+	float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+	float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+	float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+	returnVector.x = dot(row0, vector);
+	returnVector.y = dot(row1, vector);
+	returnVector.z = dot(row2, vector);
+	returnVector.w = dot(row3, vector);
+	return returnVector;
+}
+
+__kernel void 
+SolveCollisionsAndUpdateVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global int *g_vertexClothIdentifier,
+	__global float4 *g_vertexPreviousPositions,
+	__global float * g_perClothFriction,
+	__global float * g_clothDampingFactor,
+	__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+	__global CollisionShapeDescription * g_collisionObjectDetails,
+	__global float4 * g_vertexForces,
+	__global float4 *g_vertexVelocities,
+	__global float4 *g_vertexPositions GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
+	
+	if( get_global_id(0) < numNodes )
+	{	
+		int clothIdentifier = g_vertexClothIdentifier[nodeID];
+		
+		// Abort if this is not a valid cloth
+		if( clothIdentifier < 0 )
+			return;
+
+
+		float4 position = (float4)(g_vertexPositions[nodeID].xyz, 1.f);
+		float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 1.f);
+			
+		float clothFriction = g_perClothFriction[clothIdentifier];
+		float dampingFactor = g_clothDampingFactor[clothIdentifier];
+		float velocityCoefficient = (1.f - dampingFactor);		
+		float4 difference = position - previousPosition;
+		float4 velocity = difference*velocityCoefficient*isolverdt;
+		
+		CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		
+		if( numObjects > 0 )
+		{
+			// We have some possible collisions to deal with
+			for( int collision = collisionObjectIndices.firstObject; collision < collisionObjectIndices.endObject; ++collision )
+			{
+				CollisionShapeDescription shapeDescription = g_collisionObjectDetails[collision];
+				float colliderFriction = shapeDescription.friction;
+
+				if( shapeDescription.collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+				{
+					// Colliding with a capsule
+
+					float capsuleHalfHeight = shapeDescription.halfHeight;
+					float capsuleRadius = shapeDescription.radius;
+					float capsuleMargin = shapeDescription.margin;
+					int capsuleupAxis = shapeDescription.upAxis;
+
+					// Four columns of worldTransform matrix
+					float4 worldTransform[4];
+					worldTransform[0] = shapeDescription.shapeTransform[0];
+					worldTransform[1] = shapeDescription.shapeTransform[1];
+					worldTransform[2] = shapeDescription.shapeTransform[2];
+					worldTransform[3] = shapeDescription.shapeTransform[3];
+
+					// Correctly define capsule centerline vector 
+					float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f); 
+					float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
+					c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+					c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+					c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+					c2.x = -c1.x;
+					c2.y = -c1.y;
+					c2.z = -c1.z;
+
+
+					float4 worldC1 = matrixVectorMul(worldTransform, c1);
+					float4 worldC2 = matrixVectorMul(worldTransform, c2);
+					float4 segment = (worldC2 - worldC1);
+
+					// compute distance of tangent to vertex along line segment in capsule
+					float distanceAlongSegment = -( mydot3a( (worldC1 - position), segment ) / mydot3a(segment, segment) );
+
+					float4 closestPoint = (worldC1 + (float4)(segment * distanceAlongSegment));
+					float distanceFromLine = length(position - closestPoint);
+					float distanceFromC1 = length(worldC1 - position);
+					float distanceFromC2 = length(worldC2 - position);
+					
+					// Final distance from collision, point to push from, direction to push in
+					// for impulse force
+					float dist;
+					float4 normalVector;
+					if( distanceAlongSegment < 0 )
+					{
+						dist = distanceFromC1;
+						normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);
+					} else if( distanceAlongSegment > 1.f ) {
+						dist = distanceFromC2;
+						normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);	
+					} else {
+						dist = distanceFromLine;
+						normalVector = (float4)(normalize(position - closestPoint).xyz, 0.f);
+					}
+						
+					float4 colliderLinearVelocity = shapeDescription.linearVelocity;
+					float4 colliderAngularVelocity = shapeDescription.angularVelocity;
+					float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, position - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
+
+					float minDistance = capsuleRadius + capsuleMargin;
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+					
+					
+					// Check for a collision
+					if( dist < minDistance )
+					{
+						// Project back to surface along normal
+						position = position + (float4)((minDistance - dist)*normalVector*0.9f);
+						velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+						float4 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+						float4 p1 = normalize(cross(normalVector, segment));
+						float4 p2 = normalize(cross(p1, normalVector));
+						// Full friction is sum of velocities in each direction of plane
+						float4 frictionVector = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
+
+						// Real friction is peak friction corrected by friction coefficients
+						frictionVector = frictionVector * (colliderFriction*clothFriction);
+
+						float approachSpeed = dot(relativeVelocity, normalVector);
+
+						if( approachSpeed <= 0.0f )
+							forceOnVertex -= frictionVector;
+					}
+				}
+			}
+		}
+
+		g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);	
+
+		// Update external force
+		g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
+
+		g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
+	}
+}
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl
new file mode 100644
index 000000000..8720b72e0
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl
@@ -0,0 +1,242 @@
+MSTRINGIFY(
+
+//#pragma OPENCL EXTENSION cl_amd_printf:enable\n
+
+float mydot3a(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+float mylength3(float4 a)
+{
+	a.w = 0;
+	return length(a);
+}
+
+float4 mynormalize3(float4 a)
+{
+	a.w = 0;
+	return normalize(a);
+}
+
+typedef struct 
+{
+	int firstObject;
+	int endObject;
+} CollisionObjectIndices;
+
+typedef struct 
+{
+	float4 shapeTransform[4]; // column major 4x4 matrix
+	float4 linearVelocity;
+	float4 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+	
+
+	// Shape information
+	// Compressed from the union
+	float radius;
+	float halfHeight;
+	int upAxis;
+		
+	float margin;
+	float friction;
+
+	int padding0;
+	
+} CollisionShapeDescription;
+
+// From btBroadphaseProxy.h
+__constant int CAPSULE_SHAPE_PROXYTYPE = 10;
+
+// Multiply column-major matrix against vector
+float4 matrixVectorMul( float4 matrix[4], float4 vector )
+{
+	float4 returnVector;
+	float4 row0 = (float4)(matrix[0].x, matrix[1].x, matrix[2].x, matrix[3].x);
+	float4 row1 = (float4)(matrix[0].y, matrix[1].y, matrix[2].y, matrix[3].y);
+	float4 row2 = (float4)(matrix[0].z, matrix[1].z, matrix[2].z, matrix[3].z);
+	float4 row3 = (float4)(matrix[0].w, matrix[1].w, matrix[2].w, matrix[3].w);
+	returnVector.x = dot(row0, vector);
+	returnVector.y = dot(row1, vector);
+	returnVector.z = dot(row2, vector);
+	returnVector.w = dot(row3, vector);
+	return returnVector;
+}
+
+__kernel void 
+SolveCollisionsAndUpdateVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global int *g_vertexClothIdentifier,
+	__global float4 *g_vertexPreviousPositions,
+	__global float * g_perClothFriction,
+	__global float * g_clothDampingFactor,
+	__global CollisionObjectIndices * g_perClothCollisionObjectIndices,
+	__global CollisionShapeDescription * g_collisionObjectDetails,
+	__global float4 * g_vertexForces,
+	__global float4 *g_vertexVelocities,
+	__global float4 *g_vertexPositions,
+	__local CollisionShapeDescription *localCollisionShapes,
+	__global float * g_vertexInverseMasses)
+{
+	int nodeID = get_global_id(0);
+	float4 forceOnVertex = (float4)(0.f, 0.f, 0.f, 0.f);
+
+	int clothIdentifier = g_vertexClothIdentifier[nodeID];
+
+	// Abort if this is not a valid cloth
+	if( clothIdentifier < 0 )
+		return;
+	
+
+	float4 position = (float4)(g_vertexPositions[nodeID].xyz, 0.f);
+	float4 previousPosition = (float4)(g_vertexPreviousPositions[nodeID].xyz, 0.f);
+			
+	float clothFriction = g_perClothFriction[clothIdentifier];
+	float dampingFactor = g_clothDampingFactor[clothIdentifier];
+	float velocityCoefficient = (1.f - dampingFactor);		
+	float4 difference = position - previousPosition;
+	float4 velocity = difference*velocityCoefficient*isolverdt;			
+	float inverseMass = g_vertexInverseMasses[nodeID];
+		
+	CollisionObjectIndices collisionObjectIndices = g_perClothCollisionObjectIndices[clothIdentifier];
+	
+	int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		
+	if( numObjects > 0 )
+	{
+		// We have some possible collisions to deal with
+		
+		// First load all of the collision objects into LDS
+		int numObjects = collisionObjectIndices.endObject - collisionObjectIndices.firstObject;
+		if( get_local_id(0) < numObjects )
+		{
+			localCollisionShapes[get_local_id(0)] = g_collisionObjectDetails[ collisionObjectIndices.firstObject + get_local_id(0) ];
+		}
+	}
+
+	// Safe as the vertices are padded so that not more than one soft body is in a group
+	barrier(CLK_LOCAL_MEM_FENCE);
+
+	// Annoyingly, even though I know the flow control is not varying, the compiler will not let me skip this
+	if( numObjects > 0 )
+	{
+		
+		
+		// We have some possible collisions to deal with
+		for( int collision = 0; collision < numObjects; ++collision )
+		{
+			CollisionShapeDescription shapeDescription = localCollisionShapes[collision];
+			float colliderFriction = localCollisionShapes[collision].friction;
+		
+			if( localCollisionShapes[collision].collisionShapeType == CAPSULE_SHAPE_PROXYTYPE )
+			{
+				// Colliding with a capsule
+
+				float capsuleHalfHeight = localCollisionShapes[collision].halfHeight;
+				float capsuleRadius = localCollisionShapes[collision].radius;
+				float capsuleMargin = localCollisionShapes[collision].margin;
+				int capsuleupAxis = localCollisionShapes[collision].upAxis;
+
+				if ( capsuleHalfHeight <= 0 )
+						capsuleHalfHeight = 0.0001f;
+				float4 worldTransform[4];
+				worldTransform[0] = localCollisionShapes[collision].shapeTransform[0];
+				worldTransform[1] = localCollisionShapes[collision].shapeTransform[1];
+				worldTransform[2] = localCollisionShapes[collision].shapeTransform[2];
+				worldTransform[3] = localCollisionShapes[collision].shapeTransform[3];
+
+				// Correctly define capsule centerline vector 
+				float4 c1 = (float4)(0.f, 0.f, 0.f, 1.f); 
+				float4 c2 = (float4)(0.f, 0.f, 0.f, 1.f);
+				c1.x = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 0 );
+				c1.y = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 1 );
+				c1.z = select( 0.f, -capsuleHalfHeight, capsuleupAxis == 2 );
+				c2.x = -c1.x;
+				c2.y = -c1.y;
+				c2.z = -c1.z;
+
+				float4 worldC1 = matrixVectorMul(worldTransform, c1);
+				float4 worldC2 = matrixVectorMul(worldTransform, c2);
+				float4 segment = (float4)((worldC2 - worldC1).xyz, 0.f);
+
+				float4 segmentNormalized = mynormalize3(segment);
+				float distanceAlongSegment =mydot3a( (position - worldC1), segmentNormalized );
+
+				float4 closestPointOnSegment = (worldC1 + (float4)(segmentNormalized * distanceAlongSegment));
+				float distanceFromLine = mylength3(position - closestPointOnSegment);
+				float distanceFromC1 = mylength3(worldC1 - position);
+				float distanceFromC2 = mylength3(worldC2 - position);
+	
+				// Final distance from collision, point to push from, direction to push in
+				// for impulse force
+				float dist;
+				float4 normalVector;
+
+				if( distanceAlongSegment < 0 )
+				{
+					dist = distanceFromC1;
+					normalVector = (float4)(normalize(position - worldC1).xyz, 0.f);		
+				} else if( distanceAlongSegment > length(segment) ) {
+					dist = distanceFromC2;
+					normalVector = (float4)(normalize(position - worldC2).xyz, 0.f);	
+				} else {
+					dist = distanceFromLine;
+					normalVector = (float4)(normalize(position - closestPointOnSegment).xyz, 0.f);
+				}
+						
+				float minDistance = capsuleRadius + capsuleMargin;
+				float4 closestPointOnSurface = (float4)((position + (minDistance - dist) * normalVector).xyz, 0.f);
+										
+				float4 colliderLinearVelocity = shapeDescription.linearVelocity;
+				float4 colliderAngularVelocity = shapeDescription.angularVelocity;
+				float4 velocityOfSurfacePoint = colliderLinearVelocity + cross(colliderAngularVelocity, closestPointOnSurface - (float4)(worldTransform[0].w, worldTransform[1].w, worldTransform[2].w, 0.f));
+					
+					
+				// Check for a collision
+				if( dist < minDistance )
+				{
+					// Project back to surface along normal
+					position = closestPointOnSurface;
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+					float4 relativeVelocity = velocity - velocityOfSurfacePoint;
+
+					float4 p1 = mynormalize3(cross(normalVector, segment));
+					float4 p2 = mynormalize3(cross(p1, normalVector));
+					
+					float4 tangentialVel = p1*mydot3a(relativeVelocity, p1) + p2*mydot3a(relativeVelocity, p2);
+					float frictionCoef = (colliderFriction * clothFriction);
+					if (frictionCoef>1.f)
+						frictionCoef = 1.f;
+						
+					//only apply friction if objects are not moving apart
+					float projVel = mydot3a(relativeVelocity,normalVector);
+					if ( projVel >= -0.001f)
+					{
+						if ( inverseMass > 0 )
+						{
+							//float4 myforceOnVertex = -tangentialVel * frictionCoef *  isolverdt * (1.0f / inverseMass);
+							position += (-tangentialVel * frictionCoef) / (isolverdt);
+						}
+					}						
+					
+					// In case of no collision, this is the value of velocity
+					velocity = (position - previousPosition) * velocityCoefficient * isolverdt;
+
+				}
+			}
+		}
+	}
+	
+	g_vertexVelocities[nodeID] = (float4)(velocity.xyz, 0.f);	
+
+	// Update external force
+	g_vertexForces[nodeID] = (float4)(forceOnVertex.xyz, 0.f);
+
+	g_vertexPositions[nodeID] = (float4)(position.xyz, 0.f);
+}
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl
new file mode 100644
index 000000000..e4a5341c6
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositions.cl
@@ -0,0 +1,57 @@
+
+
+
+MSTRINGIFY(
+
+
+float mydot3(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+
+__kernel void 
+SolvePositionsFromLinksKernel( 
+	const int startLink,
+	const int numLinks,
+	const float kst,
+	const float ti,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksMassLSC,
+	__global float * g_linksRestLengthSquared,
+	__global float * g_verticesInverseMass,
+	__global float4 * g_vertexPositions GUID_ARG)
+	
+{
+	int linkID = get_global_id(0) + startLink;
+	if( get_global_id(0) < numLinks )
+	{	
+		float massLSC = g_linksMassLSC[linkID];
+		float restLengthSquared = g_linksRestLengthSquared[linkID];
+		
+		if( massLSC > 0.0f )
+		{		
+			int2 nodeIndices = g_linksVertexIndices[linkID];
+			int node0 = nodeIndices.x;
+			int node1 = nodeIndices.y;
+			
+			float4 position0 = g_vertexPositions[node0];
+			float4 position1 = g_vertexPositions[node1];
+
+			float inverseMass0 = g_verticesInverseMass[node0];
+			float inverseMass1 = g_verticesInverseMass[node1]; 
+
+			float4 del = position1 - position0;
+			float len  = mydot3(del, del);
+			float k    = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+
+			g_vertexPositions[node0] = position0;
+			g_vertexPositions[node1] = position1;
+
+		}
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl
new file mode 100644
index 000000000..e99bbf23d
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/SolvePositionsSIMDBatched.cl
@@ -0,0 +1,130 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+MSTRINGIFY(
+
+float mydot3(float4 a, float4 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}
+
+__kernel __attribute__((reqd_work_group_size(WAVEFRONT_BLOCK_MULTIPLIER*WAVEFRONT_SIZE, 1, 1)))
+void 
+SolvePositionsFromLinksKernel( 
+	const int startWaveInBatch,
+	const int numWaves,
+	const float kst,
+	const float ti,
+	__global int2 *g_wavefrontBatchCountsVertexCounts,
+	__global int *g_vertexAddressesPerWavefront,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksMassLSC,
+	__global float * g_linksRestLengthSquared,
+	__global float * g_verticesInverseMass,
+	__global float4 * g_vertexPositions,
+	__local int2 *wavefrontBatchCountsVertexCounts,
+	__local float4 *vertexPositionSharedData,
+	__local float *vertexInverseMassSharedData)
+{
+	const int laneInWavefront = (get_global_id(0) & (WAVEFRONT_SIZE-1));
+	const int wavefront = startWaveInBatch + (get_global_id(0) / WAVEFRONT_SIZE);
+	const int firstWavefrontInBlock = startWaveInBatch + get_group_id(0) * WAVEFRONT_BLOCK_MULTIPLIER;
+	const int localWavefront = wavefront - firstWavefrontInBlock;
+
+	// Mask out in case there's a stray "wavefront" at the end that's been forced in through the multiplier	
+	if( wavefront < (startWaveInBatch + numWaves) )
+	{	
+		// Load the batch counts for the wavefronts
+		
+		int2 batchesAndVerticesWithinWavefront = g_wavefrontBatchCountsVertexCounts[wavefront];
+		int batchesWithinWavefront = batchesAndVerticesWithinWavefront.x;
+		int verticesUsedByWave = batchesAndVerticesWithinWavefront.y;
+
+		// Load the vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_vertexPositions[vertexAddress];
+			vertexInverseMassSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex] = g_verticesInverseMass[vertexAddress];
+		}
+		
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		// Loop through the batches performing the solve on each in LDS
+		int baseDataLocationForWave = WAVEFRONT_SIZE * wavefront * MAX_BATCHES_PER_WAVE;	
+
+		//for( int batch = 0; batch < batchesWithinWavefront; ++batch )
+		
+		int batch = 0;
+		do
+		{
+			int baseDataLocation = baseDataLocationForWave + WAVEFRONT_SIZE * batch;
+			int locationOfValue = baseDataLocation + laneInWavefront;
+			
+			
+			// These loads should all be perfectly linear across the WF
+			int2 localVertexIndices = g_linksVertexIndices[locationOfValue];
+			float massLSC = g_linksMassLSC[locationOfValue];
+			float restLengthSquared = g_linksRestLengthSquared[locationOfValue];
+			
+			// LDS vertex addresses based on logical wavefront number in block and loaded index
+			int vertexAddress0 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.x;
+			int vertexAddress1 = MAX_NUM_VERTICES_PER_WAVE * localWavefront + localVertexIndices.y;
+			
+			float4 position0 = vertexPositionSharedData[vertexAddress0];
+			float4 position1 = vertexPositionSharedData[vertexAddress1];
+
+			float inverseMass0 = vertexInverseMassSharedData[vertexAddress0];
+			float inverseMass1 = vertexInverseMassSharedData[vertexAddress1]; 
+
+			float4 del = position1 - position0;
+			float len = mydot3(del, del);
+			
+			float k = 0;
+			if( massLSC > 0.0f )
+			{		
+				k = ((restLengthSquared - len)/(massLSC*(restLengthSquared+len)))*kst;
+			}
+			
+			position0 = position0 - del*(k*inverseMass0);
+			position1 = position1 + del*(k*inverseMass1);
+			
+			// Ensure compiler does not re-order memory operations
+			barrier(CLK_LOCAL_MEM_FENCE);
+
+			vertexPositionSharedData[vertexAddress0] = position0;
+			vertexPositionSharedData[vertexAddress1] = position1;
+			
+			// Ensure compiler does not re-order memory operations
+			barrier(CLK_LOCAL_MEM_FENCE);
+				
+			
+			++batch;
+		} while( batch < batchesWithinWavefront );
+
+		// Update the global memory vertices for the wavefronts
+		for( int vertex = laneInWavefront; vertex < verticesUsedByWave; vertex+=WAVEFRONT_SIZE )
+		{
+			int vertexAddress = g_vertexAddressesPerWavefront[wavefront*MAX_NUM_VERTICES_PER_WAVE + vertex];
+
+			g_vertexPositions[vertexAddress] = (float4)(vertexPositionSharedData[localWavefront*MAX_NUM_VERTICES_PER_WAVE + vertex].xyz, 0.f);
+		}		
+		
+	}
+
+}
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl
new file mode 100644
index 000000000..1d925a31f
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateConstants.cl
@@ -0,0 +1,44 @@
+MSTRINGIFY(
+
+/*#define float3 float4
+
+float dot3(float3 a, float3 b)
+{
+   return a.x*b.x + a.y*b.y + a.z*b.z;
+}*/
+
+__kernel void 
+UpdateConstantsKernel( 
+	const int numLinks,
+	__global int2 * g_linksVertexIndices,
+	__global float4 * g_vertexPositions,
+	__global float * g_vertexInverseMasses,
+	__global float * g_linksMaterialLSC,
+	__global float * g_linksMassLSC,
+	__global float * g_linksRestLengthSquared,
+	__global float * g_linksRestLengths)
+{
+	int linkID = get_global_id(0);
+	if( linkID < numLinks )
+	{	
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		float linearStiffnessCoefficient = g_linksMaterialLSC[ linkID ];
+		
+		float3 position0   = g_vertexPositions[node0].xyz;
+		float3 position1   = g_vertexPositions[node1].xyz;
+		float inverseMass0 = g_vertexInverseMasses[node0];
+		float inverseMass1 = g_vertexInverseMasses[node1];
+
+		float3 difference = position0 - position1;
+		float length2 = dot(difference, difference);
+		float length = sqrt(length2);
+	
+		g_linksRestLengths[linkID] = length;
+		g_linksMassLSC[linkID] = (inverseMass0 + inverseMass1)/linearStiffnessCoefficient;
+		g_linksRestLengthSquared[linkID] = length*length;		
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl
new file mode 100644
index 000000000..3b2516e7f
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateFixedVertexPositions.cl
@@ -0,0 +1,25 @@
+MSTRINGIFY(
+	
+__kernel void 
+UpdateFixedVertexPositions(
+	const uint numNodes,
+	__global int * g_anchorIndex,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_anchorPositions GUID_ARG)
+{
+	unsigned int nodeID = get_global_id(0);
+
+	if( nodeID < numNodes )
+	{		
+		int anchorIndex  = g_anchorIndex[nodeID];
+		float4 position = g_vertexPositions[nodeID];
+
+		if ( anchorIndex >= 0 )
+		{
+			float4 anchorPosition = g_anchorPositions[anchorIndex];
+			g_vertexPositions[nodeID] = anchorPosition;
+		}
+	}		
+}
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl
new file mode 100644
index 000000000..aa7c778ab
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNodes.cl
@@ -0,0 +1,39 @@
+MSTRINGIFY(
+
+
+__kernel void 
+updateVelocitiesFromPositionsWithVelocitiesKernel( 
+	int numNodes,
+	float isolverdt,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexPreviousPositions,
+	__global int * g_vertexClothIndices,
+	__global float *g_clothVelocityCorrectionCoefficients,
+	__global float * g_clothDampingFactor,
+	__global float4 * g_vertexVelocities,
+	__global float4 * g_vertexForces GUID_ARG)
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		float4 position = g_vertexPositions[nodeID];
+		float4 previousPosition = g_vertexPreviousPositions[nodeID];
+		float4 velocity = g_vertexVelocities[nodeID];
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float velocityCorrectionCoefficient = g_clothVelocityCorrectionCoefficients[clothIndex];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float4 difference = position - previousPosition;
+				
+		velocity += difference*velocityCorrectionCoefficient*isolverdt;
+		
+		// Damp the velocity
+		velocity *= velocityCoefficient;
+		
+		g_vertexVelocities[nodeID] = velocity;
+		g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl
new file mode 100644
index 000000000..d277b683a
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdateNormals.cl
@@ -0,0 +1,102 @@
+MSTRINGIFY(
+
+float length3(float4 a)
+{
+	a.w = 0;
+	return length(a);
+}
+
+float4 normalize3(float4 a)
+{
+	a.w = 0;
+	return normalize(a);
+}
+
+__kernel void 
+ResetNormalsAndAreasKernel(
+	const unsigned int numNodes,
+	__global float4 * g_vertexNormals,
+	__global float * g_vertexArea GUID_ARG)
+{
+	if( get_global_id(0) < numNodes )
+	{
+		g_vertexNormals[get_global_id(0)] = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+		g_vertexArea[get_global_id(0)]    = 0.0f;
+	}
+}
+
+
+__kernel void 
+UpdateSoftBodiesKernel(
+	const unsigned int startFace,
+	const unsigned int numFaces,
+	__global int4 * g_triangleVertexIndexSet,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexNormals,
+	__global float * g_vertexArea,
+	__global float4 * g_triangleNormals,
+	__global float * g_triangleArea GUID_ARG)
+{
+	int faceID = get_global_id(0) + startFace;
+	if( get_global_id(0) < numFaces )
+	{		
+		int4 triangleIndexSet = g_triangleVertexIndexSet[ faceID ];
+		int nodeIndex0 = triangleIndexSet.x;
+		int nodeIndex1 = triangleIndexSet.y;
+		int nodeIndex2 = triangleIndexSet.z;
+
+		float4 node0 = g_vertexPositions[nodeIndex0];
+		float4 node1 = g_vertexPositions[nodeIndex1];
+		float4 node2 = g_vertexPositions[nodeIndex2];
+		float4 nodeNormal0 = g_vertexNormals[nodeIndex0];
+		float4 nodeNormal1 = g_vertexNormals[nodeIndex1];
+		float4 nodeNormal2 = g_vertexNormals[nodeIndex2];
+		float vertexArea0 = g_vertexArea[nodeIndex0];
+		float vertexArea1 = g_vertexArea[nodeIndex1];
+		float vertexArea2 = g_vertexArea[nodeIndex2];
+		
+		float4 vector0 = node1 - node0;
+		float4 vector1 = node2 - node0;
+		
+		float4 faceNormal = cross(vector0, vector1);
+		float triangleArea = length(faceNormal);
+
+		nodeNormal0 = nodeNormal0 + faceNormal;
+		nodeNormal1 = nodeNormal1 + faceNormal;
+		nodeNormal2 = nodeNormal2 + faceNormal;
+		vertexArea0 = vertexArea0 + triangleArea;
+		vertexArea1 = vertexArea1 + triangleArea;
+		vertexArea2 = vertexArea2 + triangleArea;
+		
+		g_triangleNormals[faceID] = normalize3(faceNormal);
+		g_vertexNormals[nodeIndex0] = nodeNormal0;
+		g_vertexNormals[nodeIndex1] = nodeNormal1;
+		g_vertexNormals[nodeIndex2] = nodeNormal2;
+		g_triangleArea[faceID] = triangleArea;
+		g_vertexArea[nodeIndex0] = vertexArea0;
+		g_vertexArea[nodeIndex1] = vertexArea1;
+		g_vertexArea[nodeIndex2] = vertexArea2;
+	}
+}
+
+__kernel void 
+NormalizeNormalsAndAreasKernel( 
+	const unsigned int numNodes,
+	__global int * g_vertexTriangleCount,
+	__global float4 * g_vertexNormals,
+	__global float * g_vertexArea GUID_ARG)
+{
+	if( get_global_id(0) < numNodes )
+	{
+		float4 normal = g_vertexNormals[get_global_id(0)];
+		float area = g_vertexArea[get_global_id(0)];
+		int numTriangles = g_vertexTriangleCount[get_global_id(0)];
+		
+		float vectorLength = length3(normal);
+		
+		g_vertexNormals[get_global_id(0)] = normalize3(normal);
+		g_vertexArea[get_global_id(0)] = area/(float)(numTriangles);
+	}
+}
+
+);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl
new file mode 100644
index 000000000..a2610314a
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositions.cl
@@ -0,0 +1,34 @@
+MSTRINGIFY(
+
+__kernel void 
+updateVelocitiesFromPositionsWithoutVelocitiesKernel( 
+	const int numNodes,
+	const float isolverdt,
+	__global float4 * g_vertexPositions,
+	__global float4 * g_vertexPreviousPositions,
+	__global int * g_vertexClothIndices,
+	__global float * g_clothDampingFactor,
+	__global float4 * g_vertexVelocities,
+	__global float4 * g_vertexForces GUID_ARG)
+
+{
+	int nodeID = get_global_id(0);
+	if( nodeID < numNodes )
+	{	
+		float4 position = g_vertexPositions[nodeID];
+		float4 previousPosition = g_vertexPreviousPositions[nodeID];
+		float4 velocity = g_vertexVelocities[nodeID];
+		int clothIndex = g_vertexClothIndices[nodeID];
+		float dampingFactor = g_clothDampingFactor[clothIndex];
+		float velocityCoefficient = (1.f - dampingFactor);
+		
+		float4 difference = position - previousPosition;
+				
+		velocity = difference*velocityCoefficient*isolverdt;		
+		
+		g_vertexVelocities[nodeID] = velocity;
+		g_vertexForces[nodeID] = (float4)(0.f, 0.f, 0.f, 0.f);								
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl
new file mode 100644
index 000000000..ec1f4878c
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/UpdatePositionsFromVelocities.cl
@@ -0,0 +1,28 @@
+
+MSTRINGIFY(
+
+
+
+
+__kernel void 
+UpdatePositionsFromVelocitiesKernel( 
+	const int numNodes,
+	const float solverSDT,
+	__global float4 * g_vertexVelocities,
+	__global float4 * g_vertexPreviousPositions,
+	__global float4 * g_vertexCurrentPosition GUID_ARG)
+{
+	int vertexID = get_global_id(0);
+	if( vertexID < numNodes )
+	{	
+		float4 previousPosition = g_vertexPreviousPositions[vertexID];
+		float4 velocity         = g_vertexVelocities[vertexID];
+		
+		float4 newPosition      = previousPosition + velocity*solverSDT;
+		
+		g_vertexCurrentPosition[vertexID]   = newPosition;
+		g_vertexPreviousPositions[vertexID] = newPosition;
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl
new file mode 100644
index 000000000..19224bdaa
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/OpenCLC10/VSolveLinks.cl
@@ -0,0 +1,45 @@
+MSTRINGIFY(
+
+__kernel void 
+VSolveLinksKernel( 
+	int startLink,
+	int numLinks,
+	float kst,
+	__global int2 * g_linksVertexIndices,
+	__global float * g_linksLengthRatio,
+	__global float4 * g_linksCurrentLength,
+	__global float * g_vertexInverseMass,
+	__global float4 * g_vertexVelocity GUID_ARG)
+{
+	int linkID = get_global_id(0) + startLink;
+	if( get_global_id(0) < numLinks )
+	{		
+		int2 nodeIndices = g_linksVertexIndices[linkID];
+		int node0 = nodeIndices.x;
+		int node1 = nodeIndices.y;
+		
+		float linkLengthRatio = g_linksLengthRatio[linkID];
+		float3 linkCurrentLength = g_linksCurrentLength[linkID].xyz;
+		
+		float3 vertexVelocity0 = g_vertexVelocity[node0].xyz;
+		float3 vertexVelocity1 = g_vertexVelocity[node1].xyz;
+
+		float vertexInverseMass0 = g_vertexInverseMass[node0];
+		float vertexInverseMass1 = g_vertexInverseMass[node1]; 
+
+		float3 nodeDifference = vertexVelocity0 - vertexVelocity1;
+		float dotResult = dot(linkCurrentLength, nodeDifference);
+		float j = -dotResult*linkLengthRatio*kst;
+		
+		float3 velocityChange0 = linkCurrentLength*(j*vertexInverseMass0);
+		float3 velocityChange1 = linkCurrentLength*(j*vertexInverseMass1);
+		
+		vertexVelocity0 += velocityChange0;
+		vertexVelocity1 -= velocityChange1;
+
+		g_vertexVelocity[node0] = (float4)(vertexVelocity0, 0.f);
+		g_vertexVelocity[node1] = (float4)(vertexVelocity1, 0.f);
+	}
+}
+
+);
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h
new file mode 100644
index 000000000..f824f2813
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverBuffer_OpenCL.h
@@ -0,0 +1,209 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
+
+// OpenCL support
+
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+#ifndef SAFE_RELEASE
+#define SAFE_RELEASE(p)      { if(p) { (p)->Release(); (p)=NULL; } }
+#endif
+
+template <typename ElementType> class btOpenCLBuffer
+{
+public:
+
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_clContext;
+	cl_mem				m_buffer;
+
+
+
+	btAlignedObjectArray< ElementType > * m_CPUBuffer;
+	
+	int  m_gpuSize;
+	bool m_onGPU;
+	bool m_readOnlyOnGPU;
+	bool m_allocated;
+
+
+	bool createBuffer( cl_mem* preexistingBuffer = 0)
+	{
+
+		cl_int err;
+		 
+
+		if( preexistingBuffer )
+		{
+			m_buffer = *preexistingBuffer;
+		} 
+		else {
+
+			cl_mem_flags flags= m_readOnlyOnGPU ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
+
+			size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+			// At a minimum the buffer must exist
+			if( size == 0 )
+				size = sizeof(ElementType);
+			m_buffer = clCreateBuffer(m_clContext, flags, size, 0, &err);
+			if( err != CL_SUCCESS )
+			{
+				btAssert( "Buffer::Buffer(m_buffer)");
+			}
+		}
+
+		m_gpuSize = m_CPUBuffer->size();
+
+		return true;
+	}
+
+public:
+	btOpenCLBuffer( cl_command_queue	commandQue,cl_context ctx, btAlignedObjectArray< ElementType >* CPUBuffer, bool readOnly)
+		:m_cqCommandQue(commandQue),
+		m_clContext(ctx),
+		m_buffer(0),
+		m_CPUBuffer(CPUBuffer),
+		m_gpuSize(0),
+		m_onGPU(false),
+		m_readOnlyOnGPU(readOnly),
+		m_allocated(false)
+	{
+	}
+
+	~btOpenCLBuffer()
+	{
+		clReleaseMemObject(m_buffer);
+	}
+
+
+	bool moveToGPU()
+	{
+
+
+		cl_int err;
+
+		if( (m_CPUBuffer->size() != m_gpuSize) )
+		{
+			m_onGPU = false;
+		}
+
+		if( !m_allocated && m_CPUBuffer->size() == 0  )
+		{
+			// If it isn't on the GPU and yet there is no data on the CPU side this may cause a problem with some kernels.
+			// We should create *something* on the device side
+			if (!createBuffer()) {
+				return false;
+			}
+			m_allocated = true;
+		}
+
+		if( !m_onGPU && m_CPUBuffer->size() > 0 )
+		{
+			if (!m_allocated || (m_CPUBuffer->size() != m_gpuSize)) {
+				if (!createBuffer()) {
+					return false;
+				}
+				m_allocated = true;
+			}
+			
+			size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+			err = clEnqueueWriteBuffer(m_cqCommandQue,m_buffer,
+				CL_FALSE,
+				0,
+				size, 
+				&((*m_CPUBuffer)[0]),0,0,0);
+			if( err != CL_SUCCESS )
+			{
+				btAssert( "CommandQueue::enqueueWriteBuffer(m_buffer)" );
+			}
+
+			m_onGPU = true;
+		}
+
+		return true;
+
+	}
+
+	bool moveFromGPU()
+	{
+
+		cl_int err;
+
+		if (m_CPUBuffer->size() > 0) {
+			if (m_onGPU && !m_readOnlyOnGPU) {
+				size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+				err = clEnqueueReadBuffer(m_cqCommandQue,
+					m_buffer,
+					CL_TRUE,
+					0,
+					size,
+					&((*m_CPUBuffer)[0]),0,0,0);
+
+				if( err != CL_SUCCESS )
+				{
+					btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)" );
+				}
+
+				m_onGPU = false;
+			}
+		}
+
+		return true;
+	}
+
+	bool copyFromGPU()
+	{
+
+		cl_int err;
+		size_t size = m_CPUBuffer->size() * sizeof(ElementType);
+
+		if (m_CPUBuffer->size() > 0) {
+			if (m_onGPU && !m_readOnlyOnGPU) {
+				err = clEnqueueReadBuffer(m_cqCommandQue,
+					m_buffer,
+					CL_TRUE,
+					0,size, 
+					&((*m_CPUBuffer)[0]),0,0,0);
+
+				if( err != CL_SUCCESS )
+				{
+					btAssert( "CommandQueue::enqueueReadBuffer(m_buffer)");
+				}
+
+			}
+		}
+
+		return true;
+	}
+
+	virtual void changedOnCPU()
+	{
+		m_onGPU = false;
+	}
+}; // class btOpenCLBuffer
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_BUFFER_OPENCL_H
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h
new file mode 100644
index 000000000..6921f7da9
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCL.h
@@ -0,0 +1,99 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
+
+
+class btSoftBodyLinkDataOpenCL : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+
+	cl_command_queue	m_cqCommandQue;
+
+
+	btOpenCLBuffer<LinkNodePair> m_clLinks;
+	btOpenCLBuffer<float>							      m_clLinkStrength;
+	btOpenCLBuffer<float>								  m_clLinksMassLSC;
+	btOpenCLBuffer<float>								  m_clLinksRestLengthSquared;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			  m_clLinksCLength;
+	btOpenCLBuffer<float>								  m_clLinksLengthRatio;
+	btOpenCLBuffer<float>								  m_clLinksRestLength;
+	btOpenCLBuffer<float>								  m_clLinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_batchStartLengths;
+
+	btSoftBodyLinkDataOpenCL(cl_command_queue queue, cl_context ctx);
+
+	virtual ~btSoftBodyLinkDataOpenCL();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( 
+		const LinkDescription &link, 
+		int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h
new file mode 100644
index 000000000..b20e8055f
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverLinkData_OpenCLSIMDAware.h
@@ -0,0 +1,169 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
+#define BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
+
+
+class btSoftBodyLinkDataOpenCLSIMDAware : public btSoftBodyLinkData
+{
+public:
+	bool				m_onGPU;
+
+	cl_command_queue	m_cqCommandQue;
+
+	const int m_wavefrontSize;
+	const int m_linksPerWorkItem;
+	const int m_maxLinksPerWavefront;
+	int m_maxBatchesWithinWave;
+	int m_maxVerticesWithinWave;
+	int m_numWavefronts;
+
+	int m_maxVertex;
+
+	struct NumBatchesVerticesPair
+	{
+		int numBatches;
+		int numVertices;
+	};
+
+	btAlignedObjectArray<int>							  m_linksPerWavefront;
+	btAlignedObjectArray<NumBatchesVerticesPair>		  m_numBatchesAndVerticesWithinWaves;
+	btOpenCLBuffer< NumBatchesVerticesPair >			  m_clNumBatchesAndVerticesWithinWaves;
+
+	// All arrays here will contain batches of m_maxLinksPerWavefront links
+	// ordered by wavefront.
+	// with either global vertex pairs or local vertex pairs
+	btAlignedObjectArray< int >							  m_wavefrontVerticesGlobalAddresses; // List of global vertices per wavefront
+	btOpenCLBuffer<int>									  m_clWavefrontVerticesGlobalAddresses;
+	btAlignedObjectArray< LinkNodePair >				  m_linkVerticesLocalAddresses; // Vertex pair for the link
+	btOpenCLBuffer<LinkNodePair>						  m_clLinkVerticesLocalAddresses;
+	btOpenCLBuffer<float>							      m_clLinkStrength;
+	btOpenCLBuffer<float>								  m_clLinksMassLSC;
+	btOpenCLBuffer<float>								  m_clLinksRestLengthSquared;
+	btOpenCLBuffer<float>								  m_clLinksRestLength;
+	btOpenCLBuffer<float>								  m_clLinksMaterialLinearStiffnessCoefficient;
+
+	struct BatchPair
+	{
+		int start;
+		int length;
+
+		BatchPair() :
+			start(0),
+			length(0)
+		{
+		}
+
+		BatchPair( int s, int l ) : 
+			start( s ),
+			length( l )
+		{
+		}
+	};
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_linkAddresses;
+	
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	btAlignedObjectArray< BatchPair >		m_wavefrontBatchStartLengths;
+
+	btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue, cl_context ctx);
+
+	virtual ~btSoftBodyLinkDataOpenCLSIMDAware();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( 
+		const LinkDescription &link, 
+		int linkIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire link set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+
+	int getMaxVerticesPerWavefront()
+	{
+		return m_maxVerticesWithinWave;
+	}
+
+	int getWavefrontSize()
+	{
+		return m_wavefrontSize;
+	}
+
+	int getLinksPerWorkItem()
+	{
+		return m_linksPerWorkItem;
+	}
+
+	int getMaxLinksPerWavefront()
+	{
+		return m_maxLinksPerWavefront;
+	}
+
+	int getMaxBatchesPerWavefront()
+	{
+		return m_maxBatchesWithinWave;
+	}
+
+	int getNumWavefronts()
+	{
+		return m_numWavefronts;
+	}
+
+	NumBatchesVerticesPair getNumBatchesAndVerticesWithinWavefront( int wavefront )
+	{
+		return m_numBatchesAndVerticesWithinWaves[wavefront];
+	}
+
+	int getVertexGlobalAddresses( int vertexIndex )
+	{
+		return m_wavefrontVerticesGlobalAddresses[vertexIndex];
+	}
+
+	/**
+	 * Get post-batching local addresses of the vertex pair for a link assuming all vertices used by a wavefront are loaded locally.
+	 */
+	LinkNodePair getVertexPairLocalAddresses( int linkIndex )
+	{
+		return m_linkVerticesLocalAddresses[linkIndex];
+	}
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_LINK_DATA_OPENCL_SIMDAWARE_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp
new file mode 100644
index 000000000..1000440bd
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.cpp
@@ -0,0 +1,126 @@
+#include "btSoftBodySolverOutputCLtoGL.h"
+#include <stdio.h> //@todo: remove the debugging printf at some stage
+#include "btSoftBodySolver_OpenCL.h"
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#include "btSoftBodySolverVertexBuffer_OpenGL.h"
+#include "BulletSoftBody/btSoftBody.h"
+
+////OpenCL 1.0 kernels don't use float3
+#define MSTRINGIFY(A) #A
+static char* OutputToVertexArrayCLString =
+#include "OpenCLC10/OutputToVertexArray.cl"
+
+	
+#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
+
+static const size_t workGroupSize = 128;
+
+void btSoftBodySolverOutputCLtoGL::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
+	btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
+	checkInitialized();
+	btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );	
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::OPENGL_BUFFER ) {		
+
+		const btOpenGLInteropVertexBufferDescriptor *openGLVertexBuffer = static_cast< btOpenGLInteropVertexBufferDescriptor* >(vertexBuffer);						
+		cl_int ciErrNum = CL_SUCCESS;    
+
+		cl_mem clBuffer = openGLVertexBuffer->getBuffer();		
+		cl_kernel outputKernel = outputToVertexArrayWithNormalsKernel;
+		if( !vertexBuffer->hasNormals() )
+			outputKernel = outputToVertexArrayWithoutNormalsKernel;
+
+		ciErrNum = clEnqueueAcquireGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, NULL);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		int numVertices = currentCloth->getNumVertices();
+
+		ciErrNum = clSetKernelArg(outputKernel, 0, sizeof(int), &firstVertex );
+		ciErrNum = clSetKernelArg(outputKernel, 1, sizeof(int), &numVertices );
+		ciErrNum = clSetKernelArg(outputKernel, 2, sizeof(cl_mem), (void*)&clBuffer );
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			int vertexOffset = vertexBuffer->getVertexOffset();
+			int vertexStride = vertexBuffer->getVertexStride();
+			ciErrNum = clSetKernelArg(outputKernel, 3, sizeof(int), &vertexOffset );
+			ciErrNum = clSetKernelArg(outputKernel, 4, sizeof(int), &vertexStride );
+			ciErrNum = clSetKernelArg(outputKernel, 5, sizeof(cl_mem), (void*)&vertexData.m_clVertexPosition.m_buffer );
+
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			int normalOffset = vertexBuffer->getNormalOffset();
+			int normalStride = vertexBuffer->getNormalStride();
+			ciErrNum = clSetKernelArg(outputKernel, 6, sizeof(int), &normalOffset );
+			ciErrNum = clSetKernelArg(outputKernel, 7, sizeof(int), &normalStride );
+			ciErrNum = clSetKernelArg(outputKernel, 8, sizeof(cl_mem), (void*)&vertexData.m_clVertexNormal.m_buffer );
+
+		}
+		size_t	numWorkItems = workGroupSize*((vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, outputKernel, 1, NULL, &numWorkItems, &workGroupSize,0 ,0 ,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(copySoftBodyToVertexBuffer)");
+		}
+
+		ciErrNum = clEnqueueReleaseGLObjects(m_cqCommandQue, 1, &clBuffer, 0, 0, 0);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueReleaseGLObjects(copySoftBodyToVertexBuffer)");
+		}
+	} else {
+		btAssert( "Undefined output for this solver output" == false );
+	}
+
+	// clFinish in here may not be the best thing. It's possible that we should have a waitForFrameComplete function.
+	clFinish(m_cqCommandQue);
+
+} // btSoftBodySolverOutputCLtoGL::outputToVertexBuffers
+
+bool btSoftBodySolverOutputCLtoGL::buildShaders()
+{
+	// Ensure current kernels are released first
+	releaseKernels();
+
+	bool returnVal = true;
+
+	if( m_shadersInitialized )
+		return true;
+	
+	outputToVertexArrayWithNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
+	outputToVertexArrayWithoutNormalsKernel = clFunctions.compileCLKernelFromString( OutputToVertexArrayCLString, "OutputToVertexArrayWithoutNormalsKernel" ,"","OpenCLC10/OutputToVertexArray.cl");
+
+
+	if( returnVal )
+		m_shadersInitialized = true;
+
+	return returnVal;
+} // btSoftBodySolverOutputCLtoGL::buildShaders
+
+void btSoftBodySolverOutputCLtoGL::releaseKernels()
+{
+	RELEASE_CL_KERNEL( outputToVertexArrayWithNormalsKernel );
+	RELEASE_CL_KERNEL( outputToVertexArrayWithoutNormalsKernel );
+
+	m_shadersInitialized = false;
+} // btSoftBodySolverOutputCLtoGL::releaseKernels
+
+bool btSoftBodySolverOutputCLtoGL::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h
new file mode 100644
index 000000000..ab3ea264c
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverOutputCLtoGL.h
@@ -0,0 +1,62 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
+#define BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
+
+#include "btSoftBodySolver_OpenCL.h"
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the CL to GL interop version.
+ */
+class btSoftBodySolverOutputCLtoGL : public btSoftBodySolverOutput
+{
+protected:
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+	CLFunctions			clFunctions;
+	
+	cl_kernel		outputToVertexArrayWithNormalsKernel;
+	cl_kernel		outputToVertexArrayWithoutNormalsKernel;
+
+	bool m_shadersInitialized;
+	
+	virtual bool checkInitialized();	
+	virtual bool buildShaders();
+	void releaseKernels();
+public:
+	btSoftBodySolverOutputCLtoGL(cl_command_queue cqCommandQue, cl_context cxMainContext) :
+		m_cqCommandQue( cqCommandQue ),
+		m_cxMainContext( cxMainContext ),
+		clFunctions(cqCommandQue, cxMainContext),
+		outputToVertexArrayWithNormalsKernel( 0 ),
+		outputToVertexArrayWithoutNormalsKernel( 0 ),
+		m_shadersInitialized( false )
+	{
+	}
+
+	virtual ~btSoftBodySolverOutputCLtoGL()
+	{
+		releaseKernels();
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OUTPUT_CL_TO_GL_H
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h
new file mode 100644
index 000000000..7e3767855
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverTriangleData_OpenCL.h
@@ -0,0 +1,84 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+
+#ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+
+
+class btSoftBodyTriangleDataOpenCL : public btSoftBodyTriangleData
+{
+public:
+	bool				m_onGPU;
+	cl_command_queue    m_queue;
+
+	btOpenCLBuffer<btSoftBodyTriangleData::TriangleNodeSet>					m_clVertexIndices;
+	btOpenCLBuffer<float>								m_clArea;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clNormal;
+
+	/**
+	 * Link addressing information for each cloth.
+	 * Allows link locations to be computed independently of data batching.
+	 */
+	btAlignedObjectArray< int >							m_triangleAddresses;
+
+	/**
+	 * Start and length values for computation batches over link data.
+	 */
+	struct btSomePair
+	{
+		btSomePair() {}
+		btSomePair(int f,int s)
+			:first(f),second(s)
+		{
+		}
+		int first;
+		int second;
+	};
+	btAlignedObjectArray< btSomePair >		m_batchStartLengths;
+
+public:
+	btSoftBodyTriangleDataOpenCL( cl_command_queue queue, cl_context ctx );
+
+	virtual ~btSoftBodyTriangleDataOpenCL();
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createTriangles( int numTriangles );
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex );
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator();
+
+	/**
+	 * Generate (and later update) the batching for the entire triangle set.
+	 * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+	 * In theory we could delay it until just before we need the cloth.
+	 * It's a one-off overhead, though, so that is a later optimisation.
+	 */
+	void generateBatches();
+}; // class btSoftBodyTriangleDataOpenCL
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_TRIANGLE_DATA_OPENCL_H
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h
new file mode 100644
index 000000000..7c223ecc1
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexBuffer_OpenGL.h
@@ -0,0 +1,166 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H 
+
+
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+		#include <CL/cl_gl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+
+#ifdef _WIN32//for glut.h
+#include <windows.h>
+#endif
+
+//think different
+#if defined(__APPLE__) && !defined (VMDMESA)
+#include <OpenGL/OpenGL.h>
+#include <OpenGL/gl.h>
+#include <OpenGL/glu.h>
+#include <GLUT/glut.h>
+#else
+
+
+#ifdef _WINDOWS
+#include <windows.h>
+#include <GL/gl.h>
+#include <GL/glu.h>
+#else
+#include <GL/glut.h>
+#endif //_WINDOWS
+#endif //APPLE
+
+
+
+class btOpenGLInteropVertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	/** OpenCL context */
+	cl_context			m_context;
+
+	/** OpenCL command queue */
+	cl_command_queue	m_commandQueue;
+	
+	/** OpenCL interop buffer */
+	cl_mem m_buffer;
+
+	/** VBO in GL that is the basis of the interop buffer */
+	GLuint m_openGLVBO;
+
+
+public:
+	/**
+	 * context is the OpenCL context this interop buffer will work in.
+	 * queue is the command queue that kernels and data movement will be enqueued into.
+	 * openGLVBO is the OpenGL vertex buffer data will be copied into.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride )
+	{
+#ifndef USE_MINICL
+		cl_int ciErrNum = CL_SUCCESS;
+		m_context = context;
+		m_commandQueue = cqCommandQue;
+		
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+
+		m_openGLVBO = openGLVBO;
+		
+		m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		m_hasVertexPositions = true;
+#else
+		btAssert(0);//MiniCL shouldn't get here
+#endif
+	}
+
+	/**
+	 * context is the OpenCL context this interop buffer will work in.
+	 * queue is the command queue that kernels and data movement will be enqueued into.
+	 * openGLVBO is the OpenGL vertex buffer data will be copied into.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 * normalOffset is the offset in floats to the first normal.
+	 * normalStride is the stride in floats between normals.
+	 */
+	btOpenGLInteropVertexBufferDescriptor( cl_command_queue cqCommandQue, cl_context context, GLuint openGLVBO, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+#ifndef USE_MINICL
+		cl_int ciErrNum = CL_SUCCESS;
+		m_context = context;
+		m_commandQueue = cqCommandQue;
+		
+		m_openGLVBO = openGLVBO;
+		
+		m_buffer = clCreateFromGLBuffer(m_context, CL_MEM_WRITE_ONLY, openGLVBO, &ciErrNum);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "clEnqueueAcquireGLObjects(copySoftBodyToVertexBuffer)");
+		}
+
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+#else
+		btAssert(0);
+#endif //USE_MINICL
+		
+	}
+
+	virtual ~btOpenGLInteropVertexBufferDescriptor()
+	{
+		clReleaseMemObject( m_buffer );
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return OPENGL_BUFFER;
+	}
+
+	virtual cl_context getContext() const
+	{
+		return m_context;
+	}
+
+	virtual cl_mem getBuffer() const
+	{
+		return m_buffer;
+	}	
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_OPENGL_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h
new file mode 100644
index 000000000..531c34279
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolverVertexData_OpenCL.h
@@ -0,0 +1,52 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
+
+
+class btSoftBodyVertexDataOpenCL : public btSoftBodyVertexData
+{
+protected:
+	bool		m_onGPU;
+	cl_command_queue	m_queue;
+
+public:
+	btOpenCLBuffer<int>									m_clClothIdentifier;
+	btOpenCLBuffer<Vectormath::Aos::Point3>				m_clVertexPosition;
+	btOpenCLBuffer<Vectormath::Aos::Point3>				m_clVertexPreviousPosition;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexVelocity;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexForceAccumulator;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>				m_clVertexNormal;
+	btOpenCLBuffer<float>									m_clVertexInverseMass;
+	btOpenCLBuffer<float>									m_clVertexArea;
+	btOpenCLBuffer<int>									m_clVertexTriangleCount;
+public:
+	btSoftBodyVertexDataOpenCL( cl_command_queue queue,  cl_context ctx);
+
+	virtual ~btSoftBodyVertexDataOpenCL();
+
+	virtual bool onAccelerator();
+
+	virtual bool moveToAccelerator();
+
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true);
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_DATA_OPENCL_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp
new file mode 100644
index 000000000..e5f4ebb25
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.cpp
@@ -0,0 +1,1820 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+#include <stdio.h> //@todo: remove the debugging printf at some stage
+#include "btSoftBodySolver_OpenCL.h"
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletSoftBody/btSoftBodyInternals.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletCollision/CollisionShapes/btSphereShape.h"
+#include "LinearMath/btQuickprof.h"
+#include <limits.h>
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
+
+#define BT_SUPPRESS_OPENCL_ASSERTS
+
+#ifdef USE_MINICL
+	#include "MiniCL/cl.h"
+#else //USE_MINICL
+	#ifdef __APPLE__
+		#include <OpenCL/OpenCL.h>
+	#else
+		#include <CL/cl.h>
+	#endif //__APPLE__
+#endif//USE_MINICL
+
+#define BT_DEFAULT_WORKGROUPSIZE 64
+
+
+#define RELEASE_CL_KERNEL(kernelName) {if( kernelName ){ clReleaseKernel( kernelName ); kernelName = 0; }}
+
+
+//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
+
+////OpenCL 1.0 kernels don't use float3
+#define MSTRINGIFY(A) #A
+static const char* PrepareLinksCLString = 
+#include "OpenCLC10/PrepareLinks.cl"
+static const char* UpdatePositionsFromVelocitiesCLString = 
+#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
+static const char* SolvePositionsCLString = 
+#include "OpenCLC10/SolvePositions.cl"
+static const char* UpdateNodesCLString = 
+#include "OpenCLC10/UpdateNodes.cl"
+static const char* UpdatePositionsCLString = 
+#include "OpenCLC10/UpdatePositions.cl"
+static const char* UpdateConstantsCLString = 
+#include "OpenCLC10/UpdateConstants.cl"
+static const char* IntegrateCLString = 
+#include "OpenCLC10/Integrate.cl"
+static const char* ApplyForcesCLString = 
+#include "OpenCLC10/ApplyForces.cl"
+static const char* UpdateFixedVertexPositionsCLString = 
+#include "OpenCLC10/UpdateFixedVertexPositions.cl"
+static const char* UpdateNormalsCLString = 
+#include "OpenCLC10/UpdateNormals.cl"
+static const char* VSolveLinksCLString = 
+#include "OpenCLC10/VSolveLinks.cl"
+static const char* SolveCollisionsAndUpdateVelocitiesCLString =
+#include "OpenCLC10/SolveCollisionsAndUpdateVelocities.cl"
+
+
+btSoftBodyVertexDataOpenCL::btSoftBodyVertexDataOpenCL( cl_command_queue queue, cl_context ctx) :
+    m_queue(queue),
+	m_clClothIdentifier( queue, ctx, &m_clothIdentifier, false ),
+	m_clVertexPosition( queue, ctx, &m_vertexPosition, false ),
+	m_clVertexPreviousPosition( queue, ctx, &m_vertexPreviousPosition, false ),
+	m_clVertexVelocity( queue, ctx, &m_vertexVelocity, false ),
+	m_clVertexForceAccumulator( queue, ctx, &m_vertexForceAccumulator, false ),
+	m_clVertexNormal( queue, ctx, &m_vertexNormal, false ),
+	m_clVertexInverseMass( queue, ctx, &m_vertexInverseMass, false ),
+	m_clVertexArea( queue, ctx, &m_vertexArea, false ),
+	m_clVertexTriangleCount( queue, ctx, &m_vertexTriangleCount, false )
+{
+}
+
+btSoftBodyVertexDataOpenCL::~btSoftBodyVertexDataOpenCL()
+{
+
+}
+
+bool btSoftBodyVertexDataOpenCL::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyVertexDataOpenCL::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clClothIdentifier.moveToGPU();
+	success = success && m_clVertexPosition.moveToGPU();
+	success = success && m_clVertexPreviousPosition.moveToGPU();
+	success = success && m_clVertexVelocity.moveToGPU();
+	success = success && m_clVertexForceAccumulator.moveToGPU();
+	success = success && m_clVertexNormal.moveToGPU();
+	success = success && m_clVertexInverseMass.moveToGPU();
+	success = success && m_clVertexArea.moveToGPU();
+	success = success && m_clVertexTriangleCount.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyVertexDataOpenCL::moveFromAccelerator(bool bCopy, bool bCopyMinimum)
+{
+	bool success = true;
+
+	if (!bCopy)
+	{
+		success = success && m_clClothIdentifier.moveFromGPU();
+		success = success && m_clVertexPosition.moveFromGPU();
+		success = success && m_clVertexPreviousPosition.moveFromGPU();
+		success = success && m_clVertexVelocity.moveFromGPU();
+		success = success && m_clVertexForceAccumulator.moveFromGPU();
+		success = success && m_clVertexNormal.moveFromGPU();
+		success = success && m_clVertexInverseMass.moveFromGPU();
+		success = success && m_clVertexArea.moveFromGPU();
+		success = success && m_clVertexTriangleCount.moveFromGPU();
+	}
+	else
+	{
+		if (bCopyMinimum)
+		{
+			success = success && m_clVertexPosition.copyFromGPU();
+			success = success && m_clVertexNormal.copyFromGPU();
+		}
+		else
+		{
+			success = success && m_clClothIdentifier.copyFromGPU();
+			success = success && m_clVertexPosition.copyFromGPU();
+			success = success && m_clVertexPreviousPosition.copyFromGPU();
+			success = success && m_clVertexVelocity.copyFromGPU();
+			success = success && m_clVertexForceAccumulator.copyFromGPU();
+			success = success && m_clVertexNormal.copyFromGPU();
+			success = success && m_clVertexInverseMass.copyFromGPU();
+			success = success && m_clVertexArea.copyFromGPU();
+			success = success && m_clVertexTriangleCount.copyFromGPU();
+		}
+	}
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+btSoftBodyLinkDataOpenCL::btSoftBodyLinkDataOpenCL(cl_command_queue queue,  cl_context ctx) 
+:m_cqCommandQue(queue),
+	m_clLinks( queue, ctx, &m_links, false ),
+	m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
+	m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
+	m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
+	m_clLinksCLength( queue, ctx, &m_linksCLength, false ),
+	m_clLinksLengthRatio( queue, ctx, &m_linksLengthRatio, false ),
+	m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
+	m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
+{
+}
+
+btSoftBodyLinkDataOpenCL::~btSoftBodyLinkDataOpenCL()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyLinkDataOpenCL::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyLinkDataOpenCL::setLinkAt( 
+	const LinkDescription &link, 
+	int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataOpenCL::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataOpenCL::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clLinks.moveToGPU();
+	success = success && m_clLinkStrength.moveToGPU();
+	success = success && m_clLinksMassLSC.moveToGPU();
+	success = success && m_clLinksRestLengthSquared.moveToGPU();
+	success = success && m_clLinksCLength.moveToGPU();
+	success = success && m_clLinksLengthRatio.moveToGPU();
+	success = success && m_clLinksRestLength.moveToGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success ) {
+		m_onGPU = true;
+	}
+
+	return success;
+}
+
+bool btSoftBodyLinkDataOpenCL::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_clLinks.moveFromGPU();
+	success = success && m_clLinkStrength.moveFromGPU();
+	success = success && m_clLinksMassLSC.moveFromGPU();
+	success = success && m_clLinksRestLengthSquared.moveFromGPU();
+	success = success && m_clLinksCLength.moveFromGPU();
+	success = success && m_clLinksLengthRatio.moveFromGPU();
+	success = success && m_clLinksRestLength.moveFromGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success ) {
+		m_onGPU = false;
+	}
+
+	return success;
+}
+
+/**
+ * Generate (and later update) the batching for the entire link set.
+ * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+ * In theory we could delay it until just before we need the cloth.
+ * It's a one-off overhead, though, so that is a later optimisation.
+ */
+void btSoftBodyLinkDataOpenCL::generateBatches()
+{
+	int numLinks = getNumLinks();
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numLinks, 0 );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		int vertex0 = getVertexPair(linkIndex).vertex0;
+		int vertex1 = getVertexPair(linkIndex).vertex1;
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the links attached to either of the connected 
+	// nodes is in
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{				
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		int vertex0 = getVertexPair(linkLocation).vertex0;
+		int vertex1 = getVertexPair(linkLocation).vertex1;
+
+		// Get the two node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+
+		// Choose the minimum colour that is in neither list
+		int colour = 0;
+		while( colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || colourListVertex1.findLinearSearch(colour) != colourListVertex1.size()  )
+			++colour;
+		// i should now be the minimum colour in neither list
+		// Add to the two lists so that future edges don't share
+		// And store the colour against this edge
+
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		batchValues[linkIndex] = colour;
+	}
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numLinks; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+	m_batchStartLengths.resize(batchCounts.size());
+	if( m_batchStartLengths.size() > 0 )
+	{
+		m_batchStartLengths.resize(batchCounts.size());
+		m_batchStartLengths[0] = BatchPair(0, 0);
+
+		int sum = 0;
+		for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+		{
+			m_batchStartLengths[batchIndex].start = sum;
+			m_batchStartLengths[batchIndex].length = batchCounts[batchIndex];
+			sum += batchCounts[batchIndex];
+		}
+	}
+
+	/////////////////////////////
+	// Sort data based on batches
+
+	// Create source arrays by copying originals
+	btAlignedObjectArray<LinkNodePair>									m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int linkLocation = m_linkAddresses[linkIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[linkIndex];
+		int newLocation = m_batchStartLengths[batch].start + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_links[newLocation] = m_links_Backup[linkLocation];
+#if 1
+		m_linkStrength[newLocation] = m_linkStrength_Backup[linkLocation];
+		m_linksMassLSC[newLocation] = m_linksMassLSC_Backup[linkLocation];
+		m_linksRestLengthSquared[newLocation] = m_linksRestLengthSquared_Backup[linkLocation];
+		m_linksLengthRatio[newLocation] = m_linksLengthRatio_Backup[linkLocation];
+		m_linksRestLength[newLocation] = m_linksRestLength_Backup[linkLocation];
+		m_linksMaterialLinearStiffnessCoefficient[newLocation] = m_linksMaterialLinearStiffnessCoefficient_Backup[linkLocation];
+#endif
+		// Update the locations array to account for the moved entry
+		m_linkAddresses[linkIndex] = newLocation;
+	}
+
+
+} // void generateBatches()
+
+
+
+
+
+btSoftBodyTriangleDataOpenCL::btSoftBodyTriangleDataOpenCL( cl_command_queue queue , cl_context ctx) : 
+    m_queue( queue ),
+	m_clVertexIndices( queue, ctx, &m_vertexIndices, false ),
+	m_clArea( queue, ctx, &m_area, false ),
+	m_clNormal( queue, ctx, &m_normal, false )
+{
+}
+
+btSoftBodyTriangleDataOpenCL::~btSoftBodyTriangleDataOpenCL()
+{
+}
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyTriangleDataOpenCL::createTriangles( int numTriangles )
+{
+	int previousSize = getNumTriangles();
+	int newSize = previousSize + numTriangles;
+
+	btSoftBodyTriangleData::createTriangles( numTriangles );
+
+	// Resize the link addresses array as well
+	m_triangleAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyTriangleDataOpenCL::setTriangleAt( const btSoftBodyTriangleData::TriangleDescription &triangle, int triangleIndex )
+{
+	btSoftBodyTriangleData::setTriangleAt( triangle, triangleIndex );
+
+	m_triangleAddresses[triangleIndex] = triangleIndex;
+}
+
+bool btSoftBodyTriangleDataOpenCL::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyTriangleDataOpenCL::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clVertexIndices.moveToGPU();
+	success = success && m_clArea.moveToGPU();
+	success = success && m_clNormal.moveToGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+bool btSoftBodyTriangleDataOpenCL::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_clVertexIndices.moveFromGPU();
+	success = success && m_clArea.moveFromGPU();
+	success = success && m_clNormal.moveFromGPU();
+
+	if( success )
+		m_onGPU = true;
+
+	return success;
+}
+
+/**
+ * Generate (and later update) the batching for the entire triangle set.
+ * This redoes a lot of work because it batches the entire set when each cloth is inserted.
+ * In theory we could delay it until just before we need the cloth.
+ * It's a one-off overhead, though, so that is a later optimisation.
+ */
+void btSoftBodyTriangleDataOpenCL::generateBatches()
+{
+	int numTriangles = getNumTriangles();
+	if( numTriangles == 0 )
+		return;
+
+	// Do the graph colouring here temporarily
+	btAlignedObjectArray< int > batchValues;
+	batchValues.resize( numTriangles );
+
+	// Find the maximum vertex value internally for now
+	int maxVertex = 0;
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		int vertex0 = getVertexSet(triangleIndex).vertex0;
+		int vertex1 = getVertexSet(triangleIndex).vertex1;
+		int vertex2 = getVertexSet(triangleIndex).vertex2;
+		
+		if( vertex0 > maxVertex )
+			maxVertex = vertex0;
+		if( vertex1 > maxVertex )
+			maxVertex = vertex1;
+		if( vertex2 > maxVertex )
+			maxVertex = vertex2;
+	}
+	int numVertices = maxVertex + 1;
+
+	// Set of lists, one for each node, specifying which colours are connected
+	// to that node.
+	// No two edges into a node can share a colour.
+	btAlignedObjectArray< btAlignedObjectArray< int > > vertexConnectedColourLists;
+	vertexConnectedColourLists.resize(numVertices);
+
+
+	//std::cout << "\n";
+	// Simple algorithm that chooses the lowest batch number
+	// that none of the faces attached to either of the connected 
+	// nodes is in
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		int vertex0 = getVertexSet(triangleLocation).vertex0;
+		int vertex1 = getVertexSet(triangleLocation).vertex1;
+		int vertex2 = getVertexSet(triangleLocation).vertex2;
+
+		// Get the three node colour lists
+		btAlignedObjectArray< int > &colourListVertex0( vertexConnectedColourLists[vertex0] );
+		btAlignedObjectArray< int > &colourListVertex1( vertexConnectedColourLists[vertex1] );
+		btAlignedObjectArray< int > &colourListVertex2( vertexConnectedColourLists[vertex2] );
+
+		// Choose the minimum colour that is in none of the lists
+		int colour = 0;
+		while( 
+			colourListVertex0.findLinearSearch(colour) != colourListVertex0.size() || 
+			colourListVertex1.findLinearSearch(colour) != colourListVertex1.size() ||
+			colourListVertex2.findLinearSearch(colour) != colourListVertex2.size() )
+		{
+			++colour;
+		}
+		// i should now be the minimum colour in neither list
+		// Add to the three lists so that future edges don't share
+		// And store the colour against this face
+		colourListVertex0.push_back(colour);
+		colourListVertex1.push_back(colour);
+		colourListVertex2.push_back(colour);
+
+		batchValues[triangleIndex] = colour;
+	}
+
+
+	// Check the colour counts
+	btAlignedObjectArray< int > batchCounts;
+	for( int i = 0; i < numTriangles; ++i )
+	{
+		int batch = batchValues[i];
+		if( batch >= batchCounts.size() )
+			batchCounts.push_back(1);
+		else
+			++(batchCounts[batch]);
+	}
+
+
+	m_batchStartLengths.resize(batchCounts.size());
+	m_batchStartLengths[0] = btSomePair(0,0);
+
+
+	int sum = 0;
+	for( int batchIndex = 0; batchIndex < batchCounts.size(); ++batchIndex )
+	{
+		m_batchStartLengths[batchIndex].first = sum;
+		m_batchStartLengths[batchIndex].second = batchCounts[batchIndex];
+		sum += batchCounts[batchIndex];
+	}
+	
+	/////////////////////////////
+	// Sort data based on batches
+	
+	// Create source arrays by copying originals
+	btAlignedObjectArray<btSoftBodyTriangleData::TriangleNodeSet>							m_vertexIndices_Backup(m_vertexIndices);
+	btAlignedObjectArray<float>										m_area_Backup(m_area);
+	btAlignedObjectArray<Vectormath::Aos::Vector3>					m_normal_Backup(m_normal);
+
+
+	for( int batch = 0; batch < batchCounts.size(); ++batch )
+		batchCounts[batch] = 0;
+
+	// Do sort as single pass into destination arrays	
+	for( int triangleIndex = 0; triangleIndex < numTriangles; ++triangleIndex )
+	{
+		// To maintain locations run off the original link locations rather than the current position.
+		// It's not cache efficient, but as we run this rarely that should not matter.
+		// It's faster than searching the link location array for the current location and then updating it.
+		// The other alternative would be to unsort before resorting, but this is equivalent to doing that.
+		int triangleLocation = m_triangleAddresses[triangleIndex];
+
+		// Obtain batch and calculate target location for the
+		// next element in that batch, incrementing the batch counter
+		// afterwards
+		int batch = batchValues[triangleIndex];
+		int newLocation = m_batchStartLengths[batch].first + batchCounts[batch];
+
+		batchCounts[batch] = batchCounts[batch] + 1;
+		m_vertexIndices[newLocation] = m_vertexIndices_Backup[triangleLocation];
+		m_area[newLocation] = m_area_Backup[triangleLocation];
+		m_normal[newLocation] = m_normal_Backup[triangleLocation];
+
+		// Update the locations array to account for the moved entry
+		m_triangleAddresses[triangleIndex] = newLocation;
+	}
+} // btSoftBodyTriangleDataOpenCL::generateBatches
+
+
+
+
+
+
+
+btOpenCLSoftBodySolver::btOpenCLSoftBodySolver(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
+	m_linkData(queue, ctx),
+	m_vertexData(queue, ctx),
+	m_triangleData(queue, ctx),
+	m_defaultCLFunctions(queue, ctx),
+	m_currentCLFunctions(&m_defaultCLFunctions),
+	m_clPerClothAcceleration(queue, ctx, &m_perClothAcceleration, true ),
+	m_clPerClothWindVelocity(queue, ctx, &m_perClothWindVelocity, true ),
+	m_clPerClothDampingFactor(queue,ctx, &m_perClothDampingFactor, true ),
+	m_clPerClothVelocityCorrectionCoefficient(queue, ctx,&m_perClothVelocityCorrectionCoefficient, true ),
+	m_clPerClothLiftFactor(queue, ctx,&m_perClothLiftFactor, true ),
+	m_clPerClothDragFactor(queue, ctx,&m_perClothDragFactor, true ),
+	m_clPerClothMediumDensity(queue, ctx,&m_perClothMediumDensity, true ),
+	m_clPerClothCollisionObjects( queue, ctx, &m_perClothCollisionObjects, true ),
+	m_clCollisionObjectDetails( queue, ctx, &m_collisionObjectDetails, true ),
+	m_clPerClothFriction( queue, ctx, &m_perClothFriction, false ),
+	m_clAnchorPosition( queue, ctx, &m_anchorPosition, true ),
+	m_clAnchorIndex( queue, ctx, &m_anchorIndex, true),
+	m_cqCommandQue( queue ),
+	m_cxMainContext(ctx),
+	m_defaultWorkGroupSize(BT_DEFAULT_WORKGROUPSIZE),
+	m_bUpdateAnchoredNodePos(bUpdateAchchoredNodePos)
+{
+
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+
+	m_prepareLinksKernel = 0;
+	m_solvePositionsFromLinksKernel = 0;
+	m_updateConstantsKernel = 0;
+	m_integrateKernel = 0;
+	m_addVelocityKernel = 0;
+	m_updatePositionsFromVelocitiesKernel = 0;
+	m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = 0;
+	m_updateVelocitiesFromPositionsWithVelocitiesKernel = 0;
+	m_vSolveLinksKernel = 0;
+	m_solveCollisionsAndUpdateVelocitiesKernel = 0;
+	m_resetNormalsAndAreasKernel = 0;
+	m_updateSoftBodiesKernel = 0;
+	m_normalizeNormalsAndAreasKernel = 0;
+	m_outputToVertexArrayKernel = 0;
+	m_applyForcesKernel = 0;
+	m_updateFixedVertexPositionsKernel = 0;
+}
+
+btOpenCLSoftBodySolver::~btOpenCLSoftBodySolver()
+{
+	releaseKernels();
+}
+
+void btOpenCLSoftBodySolver::releaseKernels()
+{
+	RELEASE_CL_KERNEL( m_prepareLinksKernel );
+	RELEASE_CL_KERNEL( m_solvePositionsFromLinksKernel );
+	RELEASE_CL_KERNEL( m_updateConstantsKernel );
+	RELEASE_CL_KERNEL( m_integrateKernel );
+	RELEASE_CL_KERNEL( m_addVelocityKernel );
+	RELEASE_CL_KERNEL( m_updatePositionsFromVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_updateVelocitiesFromPositionsWithoutVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_updateVelocitiesFromPositionsWithVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_vSolveLinksKernel );
+	RELEASE_CL_KERNEL( m_solveCollisionsAndUpdateVelocitiesKernel );
+	RELEASE_CL_KERNEL( m_resetNormalsAndAreasKernel );
+	RELEASE_CL_KERNEL( m_normalizeNormalsAndAreasKernel );
+	RELEASE_CL_KERNEL( m_outputToVertexArrayKernel );
+	RELEASE_CL_KERNEL( m_applyForcesKernel );
+	RELEASE_CL_KERNEL( m_updateFixedVertexPositionsKernel );
+
+	m_shadersInitialized = false;
+}
+
+void btOpenCLSoftBodySolver::copyBackToSoftBodies(bool bMove)
+{
+
+	// Move the vertex data back to the host first
+	m_vertexData.moveFromAccelerator(!bMove);
+
+	// Loop over soft bodies, copying all the vertex positions back for each body in turn
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btOpenCLAcceleratedSoftBodyInterface *softBodyInterface = m_softBodySet[ softBodyIndex ];
+		btSoftBody *softBody = softBodyInterface->getSoftBody();
+
+		int firstVertex = softBodyInterface->getFirstVertex();
+		int numVertices = softBodyInterface->getNumVertices();
+
+		// Copy vertices from solver back into the softbody
+		for( int vertex = 0; vertex < numVertices; ++vertex )
+		{
+			using Vectormath::Aos::Point3;
+			Point3 vertexPosition( m_vertexData.getVertexPositions()[firstVertex + vertex] );
+			Point3 normal(m_vertexData.getNormal(firstVertex + vertex));
+
+			softBody->m_nodes[vertex].m_x.setX( vertexPosition.getX() );
+			softBody->m_nodes[vertex].m_x.setY( vertexPosition.getY() );
+			softBody->m_nodes[vertex].m_x.setZ( vertexPosition.getZ() );
+
+			softBody->m_nodes[vertex].m_n.setX( normal.getX() );
+			softBody->m_nodes[vertex].m_n.setY( normal.getY() );
+			softBody->m_nodes[vertex].m_n.setZ( normal.getZ() );
+		}
+	}	
+} // btOpenCLSoftBodySolver::copyBackToSoftBodies
+
+void btOpenCLSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies, bool forceUpdate )
+{
+	if( forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+		m_anchorIndex.clear();
+
+		int maxPiterations = 0;
+		int maxViterations = 0;
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btOpenCLAcceleratedSoftBodyInterface *newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothFriction.push_back(softBody->m_cfg.kDF);
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			int maxVertices = numVertices;
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( maxVertices, softBodyIndex );
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+
+				m_anchorIndex.push_back(-1);
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+//			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+
+			// Find maximum piterations and viterations
+			int piterations = softBody->m_cfg.piterations;
+
+            if ( piterations > maxPiterations )
+                  maxPiterations = piterations;
+
+            int viterations = softBody->m_cfg.viterations;
+
+			if ( viterations > maxViterations )
+                  maxViterations = viterations;
+
+			// zero mass
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				if ( softBody->m_nodes[vertex].m_im == 0 )
+				{
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + vertex;
+					nodeInfo.pNode = &softBody->m_nodes[vertex];
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+
+			// anchor position
+			if ( numVertices > 0 )
+			{
+				for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
+				{
+					btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
+					btSoftBody::Node* firstNode = &softBody->m_nodes[0];
+
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
+					nodeInfo.pNode = anchorNode;
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+		}
+
+		
+		m_anchorPosition.clear();		
+		m_anchorPosition.resize(m_anchorNodeInfoArray.size());
+
+		for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
+		{
+			const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
+			m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
+			getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
+		}
+		
+		updateConstants(0.f);
+
+		// set position and velocity iterations
+		setNumberOfPositionIterations(maxPiterations);
+		setNumberOfVelocityIterations(maxViterations);
+
+		// set wind velocity
+		m_perClothWindVelocity.resize( m_softBodySet.size() );
+		for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();			
+			m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
+		}
+
+		m_clPerClothWindVelocity.changedOnCPU();
+
+		// generate batches
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+
+		// Build the shaders to match the batching parameters
+		buildShaders();
+	}
+}
+
+
+btSoftBodyLinkData &btOpenCLSoftBodySolver::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+btSoftBodyVertexData &btOpenCLSoftBodySolver::getVertexData()
+{
+	// TODO: Consider setting vertex data to "changed" here
+	return m_vertexData;
+}
+
+btSoftBodyTriangleData &btOpenCLSoftBodySolver::getTriangleData()
+{
+	// TODO: Consider setting triangle data to "changed" here
+	return m_triangleData;
+}
+
+void btOpenCLSoftBodySolver::resetNormalsAndAreas( int numVertices )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_resetNormalsAndAreasKernel, 0, sizeof(numVertices), (void*)&numVertices); //oclCHECKERROR(ciErrNum, CL_SUCCESS);
+	ciErrNum = clSetKernelArg(m_resetNormalsAndAreasKernel, 1, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexNormal.m_buffer);//oclCHECKERROR(ciErrNum, CL_SUCCESS);
+	ciErrNum = clSetKernelArg(m_resetNormalsAndAreasKernel,  2, sizeof(cl_mem), (void*)&m_vertexData.m_clVertexArea.m_buffer); //oclCHECKERROR(ciErrNum, CL_SUCCESS);
+	size_t numWorkItems = m_defaultWorkGroupSize*((numVertices + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_resetNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0 );
+
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 && "enqueueNDRangeKernel(m_resetNormalsAndAreasKernel)" );
+		}
+	}
+
+}
+
+void btOpenCLSoftBodySolver::normalizeNormalsAndAreas( int numVertices )
+{
+	cl_int ciErrNum;
+
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 0, sizeof(int),(void*) &numVertices);
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 1, sizeof(cl_mem), &m_vertexData.m_clVertexTriangleCount.m_buffer);
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_normalizeNormalsAndAreasKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
+	size_t	numWorkItems = m_defaultWorkGroupSize*((numVertices + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_normalizeNormalsAndAreasKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 && "enqueueNDRangeKernel(m_normalizeNormalsAndAreasKernel)");
+		}
+	}
+
+}
+
+void btOpenCLSoftBodySolver::executeUpdateSoftBodies( int firstTriangle, int numTriangles )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 0, sizeof(int), (void*) &firstTriangle);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 1, sizeof(int), &numTriangles);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 2, sizeof(cl_mem), &m_triangleData.m_clVertexIndices.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 6, sizeof(cl_mem), &m_triangleData.m_clNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateSoftBodiesKernel, 7, sizeof(cl_mem), &m_triangleData.m_clArea.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((numTriangles + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue, m_updateSoftBodiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_normalizeNormalsAndAreasKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::updateSoftBodies()
+{
+	using namespace Vectormath::Aos;
+
+
+	int numVertices = m_vertexData.getNumVertices();
+//	int numTriangles = m_triangleData.getNumTriangles();
+
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_triangleData.moveToAccelerator();
+
+	resetNormalsAndAreas( numVertices );
+
+
+	// Go through triangle batches so updates occur correctly
+	for( int batchIndex = 0; batchIndex < m_triangleData.m_batchStartLengths.size(); ++batchIndex )
+	{
+
+		int startTriangle = m_triangleData.m_batchStartLengths[batchIndex].first;
+		int numTriangles = m_triangleData.m_batchStartLengths[batchIndex].second;
+
+		executeUpdateSoftBodies( startTriangle, numTriangles );
+	}
+
+
+	normalizeNormalsAndAreas( numVertices );
+} // updateSoftBodies
+
+
+Vectormath::Aos::Vector3 btOpenCLSoftBodySolver::ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a )
+{
+	return a*Vectormath::Aos::dot(v, a);
+}
+
+void btOpenCLSoftBodySolver::ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce )
+{
+	float dtInverseMass = solverdt*inverseMass;
+	if( Vectormath::Aos::lengthSqr(force * dtInverseMass) > Vectormath::Aos::lengthSqr(vertexVelocity) )
+	{
+		vertexForce -= ProjectOnAxis( vertexVelocity, normalize( force ) )/dtInverseMass;
+	} else {
+		vertexForce += force;
+	}
+}
+
+void btOpenCLSoftBodySolver::updateFixedVertexPositions()
+{	
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_clAnchorPosition.moveToGPU();
+	m_clAnchorIndex.moveToGPU();
+
+	cl_int ciErrNum ;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,1, sizeof(cl_mem), &m_clAnchorIndex.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateFixedVertexPositionsKernel,3, sizeof(cl_mem), &m_clAnchorPosition.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateFixedVertexPositionsKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_updateFixedVertexPositionsKernel)");
+		}
+	}
+
+}
+
+void btOpenCLSoftBodySolver::applyForces( float solverdt )
+{	
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+	m_clPerClothAcceleration.moveToGPU();
+	m_clPerClothLiftFactor.moveToGPU();
+	m_clPerClothDragFactor.moveToGPU();
+	m_clPerClothMediumDensity.moveToGPU();
+	m_clPerClothWindVelocity.moveToGPU();	
+
+	cl_int ciErrNum ;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 1, sizeof(float), &solverdt);
+	float fl = FLT_EPSILON;
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 2, sizeof(float), &fl);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexNormal.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexArea.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 7, sizeof(cl_mem), &m_clPerClothLiftFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 8 ,sizeof(cl_mem), &m_clPerClothDragFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel, 9, sizeof(cl_mem), &m_clPerClothWindVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,10, sizeof(cl_mem), &m_clPerClothAcceleration.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,11, sizeof(cl_mem), &m_clPerClothMediumDensity.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,12, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
+	ciErrNum = clSetKernelArg(m_applyForcesKernel,13, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_applyForcesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize, 0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_applyForcesKernel)");
+		}
+	}
+
+}
+
+/**
+ * Integrate motion on the solver.
+ */
+void btOpenCLSoftBodySolver::integrate( float solverdt )
+{
+	// Ensure data is on accelerator
+	m_vertexData.moveToAccelerator();
+
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_integrateKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 1, sizeof(float), &solverdt);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 4, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 5, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_integrateKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
+
+	size_t numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_integrateKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+		if( ciErrNum != CL_SUCCESS )
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_integrateKernel)");
+		}
+	}
+
+}
+
+float btOpenCLSoftBodySolver::computeTriangleArea( 
+	const Vectormath::Aos::Point3 &vertex0,
+	const Vectormath::Aos::Point3 &vertex1,
+	const Vectormath::Aos::Point3 &vertex2 )
+{
+	Vectormath::Aos::Vector3 a = vertex1 - vertex0;
+	Vectormath::Aos::Vector3 b = vertex2 - vertex0;
+	Vectormath::Aos::Vector3 crossProduct = cross(a, b);
+	float area = length( crossProduct );
+	return area;
+}
+
+
+void btOpenCLSoftBodySolver::updateBounds()
+{	
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btVector3 minBound(-1e30,-1e30,-1e30), maxBound(1e30,1e30,1e30);
+		m_softBodySet[softBodyIndex]->updateBounds( minBound, maxBound );
+	}
+
+} // btOpenCLSoftBodySolver::updateBounds
+
+
+void btOpenCLSoftBodySolver::updateConstants( float timeStep )
+{			
+
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+
+}
+
+class QuickSortCompare
+{
+	public:
+
+	bool operator() ( const CollisionShapeDescription& a, const CollisionShapeDescription& b ) const
+	{
+		return ( a.softBodyIdentifier < b.softBodyIdentifier );
+	}
+};
+
+
+/**
+ * Sort the collision object details array and generate indexing into it for the per-cloth collision object array.
+ */
+void btOpenCLSoftBodySolver::prepareCollisionConstraints()
+{
+	// First do a simple sort on the collision objects
+	btAlignedObjectArray<int> numObjectsPerClothPrefixSum;
+	btAlignedObjectArray<int> numObjectsPerCloth;
+	numObjectsPerCloth.resize( m_softBodySet.size(), 0 );
+	numObjectsPerClothPrefixSum.resize( m_softBodySet.size(), 0 );
+
+
+	
+	m_collisionObjectDetails.quickSort( QuickSortCompare() );
+
+	if (!m_perClothCollisionObjects.size())
+		return;
+
+	// Generating indexing for perClothCollisionObjects
+	// First clear the previous values with the "no collision object for cloth" constant
+	for( int clothIndex = 0; clothIndex < m_perClothCollisionObjects.size(); ++clothIndex )
+	{
+		m_perClothCollisionObjects[clothIndex].firstObject = -1;
+		m_perClothCollisionObjects[clothIndex].endObject = -1;
+	}
+	int currentCloth = 0;
+	int startIndex = 0;
+	for( int collisionObject = 0; collisionObject < m_collisionObjectDetails.size(); ++collisionObject )
+	{
+		int nextCloth = m_collisionObjectDetails[collisionObject].softBodyIdentifier;
+		if( nextCloth != currentCloth )
+		{	
+			// Changed cloth in the array
+			// Set the end index and the range is what we need for currentCloth
+			m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+			m_perClothCollisionObjects[currentCloth].endObject = collisionObject;
+			currentCloth = nextCloth;
+			startIndex = collisionObject;
+		}
+	}
+
+	// And update last cloth	
+	m_perClothCollisionObjects[currentCloth].firstObject = startIndex;
+	m_perClothCollisionObjects[currentCloth].endObject =  m_collisionObjectDetails.size();
+	
+} // btOpenCLSoftBodySolver::prepareCollisionConstraints
+
+
+
+void btOpenCLSoftBodySolver::solveConstraints( float solverdt )
+{
+
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+//	int numLinks = m_linkData.getNumLinks();
+//	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
+
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+	prepareLinks();	
+
+
+
+	for( int iteration = 0; iteration < m_numberOfVelocityIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForVelocity( startLink, numLinks, kst );
+		}
+	}
+
+	
+	prepareCollisionConstraints();
+
+	// Compute new positions from velocity
+	// Also update the previous position so that our position computation is now based on the new position from the velocity solution
+	// rather than based directly on the original positions
+	if( m_numberOfVelocityIterations > 0 )
+	{
+		updateVelocitiesFromPositionsWithVelocities( 1.f/solverdt );
+	} else {
+		updateVelocitiesFromPositionsWithoutVelocities( 1.f/solverdt );
+	}
+
+	// Solve position
+	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+		for( int i = 0; i < m_linkData.m_batchStartLengths.size(); ++i )
+		{
+			int startLink = m_linkData.m_batchStartLengths[i].start;
+			int numLinks = m_linkData.m_batchStartLengths[i].length;
+
+			solveLinksForPosition( startLink, numLinks, kst, ti );
+		}
+		
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+	
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+
+}
+
+
+//////////////////////////////////////
+// Kernel dispatches
+void btOpenCLSoftBodySolver::prepareLinks()
+{
+	cl_int ciErrNum;
+	int numLinks = m_linkData.getNumLinks();
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,0, sizeof(int), &numLinks);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,1, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,2, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer);
+	ciErrNum = clSetKernelArg(m_prepareLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_linkData.getNumLinks() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_prepareLinksKernel, 1 , NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_prepareLinksKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::updatePositionsFromVelocities( float solverdt )
+{
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,1, sizeof(float), &solverdt);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,2, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updatePositionsFromVelocitiesKernel,4, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updatePositionsFromVelocitiesKernel, 1, NULL, &numWorkItems,&m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_updatePositionsFromVelocitiesKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::solveLinksForPosition( int startLink, int numLinks, float kst, float ti )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startLink);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numLinks);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((numLinks + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum!= CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
+	}
+
+} // solveLinksForPosition
+
+
+void btOpenCLSoftBodySolver::solveLinksForVelocity( int startLink, int numLinks, float kst )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 0, sizeof(int), &startLink);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 1, sizeof(int), &numLinks);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 2, sizeof(float), &kst);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 3, sizeof(cl_mem), &m_linkData.m_clLinks.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 4, sizeof(cl_mem), &m_linkData.m_clLinksLengthRatio.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 5, sizeof(cl_mem), &m_linkData.m_clLinksCLength.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 6, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_vSolveLinksKernel, 7, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((numLinks + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_vSolveLinksKernel,1,NULL,&numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_vSolveLinksKernel)");
+	}
+
+}
+
+void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithVelocities( float isolverdt )
+{
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel,0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 1, sizeof(float), &isolverdt);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 2, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 3, sizeof(cl_mem), &m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 4, sizeof(cl_mem), &m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 5, sizeof(cl_mem), &m_clPerClothVelocityCorrectionCoefficient.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 6, sizeof(cl_mem), &m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 7, sizeof(cl_mem), &m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithVelocitiesKernel, 8, sizeof(cl_mem), &m_vertexData.m_clVertexForceAccumulator.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateVelocitiesFromPositionsWithVelocitiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_updateVelocitiesFromPositionsWithVelocitiesKernel)");
+	}
+
+
+} // updateVelocitiesFromPositionsWithVelocities
+
+void btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities( float isolverdt )
+{
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, sizeof(float), &isolverdt);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 4, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 6, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 7, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_updateVelocitiesFromPositionsWithoutVelocitiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+	if( ciErrNum != CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel)");
+	}
+
+} // updateVelocitiesFromPositionsWithoutVelocities
+
+
+
+void btOpenCLSoftBodySolver::solveCollisionsAndUpdateVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	m_vertexData.moveToAccelerator();
+	m_clPerClothFriction.moveToGPU();
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothCollisionObjects.moveToGPU();
+	m_clCollisionObjectDetails.moveToGPU();
+
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
+
+	size_t	numWorkItems = m_defaultWorkGroupSize*((m_vertexData.getNumVertices() + (m_defaultWorkGroupSize-1)) / m_defaultWorkGroupSize);
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &m_defaultWorkGroupSize,0,0,0);
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_updateVelocitiesFromPositionsWithoutVelocitiesKernel)");
+		}
+	}
+
+} // btOpenCLSoftBodySolver::updateVelocitiesFromPositionsWithoutVelocities
+
+
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+void btSoftBodySolverOutputCLtoCPU::copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+
+	btSoftBodySolver *solver = softBody->getSoftBodySolver();
+	btAssert( solver->getSolverType() == btSoftBodySolver::CL_SOLVER || solver->getSolverType() == btSoftBodySolver::CL_SIMD_SOLVER );
+	btOpenCLSoftBodySolver *dxSolver = static_cast< btOpenCLSoftBodySolver * >( solver );
+
+	btOpenCLAcceleratedSoftBodyInterface* currentCloth = dxSolver->findSoftBodyInterface( softBody );
+	btSoftBodyVertexDataOpenCL &vertexData( dxSolver->m_vertexData );
+	
+
+	const int firstVertex = currentCloth->getFirstVertex();
+	const int lastVertex = firstVertex + currentCloth->getNumVertices();
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{		
+		const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);						
+		float *basePointer = cpuVertexBuffer->getBasePointer();						
+
+		vertexData.m_clVertexPosition.copyFromGPU();
+		vertexData.m_clVertexNormal.copyFromGPU();
+
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			const int vertexOffset = cpuVertexBuffer->getVertexOffset();
+			const int vertexStride = cpuVertexBuffer->getVertexStride();
+			float *vertexPointer = basePointer + vertexOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Point3 position = vertexData.getPosition(vertexIndex);
+				*(vertexPointer + 0) = position.getX();
+				*(vertexPointer + 1) = position.getY();
+				*(vertexPointer + 2) = position.getZ();
+				vertexPointer += vertexStride;
+			}
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			const int normalOffset = cpuVertexBuffer->getNormalOffset();
+			const int normalStride = cpuVertexBuffer->getNormalStride();
+			float *normalPointer = basePointer + normalOffset;
+
+			for( int vertexIndex = firstVertex; vertexIndex < lastVertex; ++vertexIndex )
+			{
+				Vectormath::Aos::Vector3 normal = vertexData.getNormal(vertexIndex);
+				*(normalPointer + 0) = normal.getX();
+				*(normalPointer + 1) = normal.getY();
+				*(normalPointer + 2) = normal.getZ();
+				normalPointer += normalStride;
+			}
+		}
+	}
+
+} // btSoftBodySolverOutputCLtoCPU::outputToVertexBuffers
+
+
+
+cl_kernel CLFunctions::compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros ,const char* orgSrcFileNameForCaching)
+{
+	printf("compiling kernelName: %s ",kernelName);
+	cl_kernel kernel=0;
+	cl_int ciErrNum;
+	size_t program_length = strlen(kernelSource);
+
+	cl_program m_cpProgram = clCreateProgramWithSource(m_cxMainContext, 1, (const char**)&kernelSource, &program_length, &ciErrNum);
+//	oclCHECKERROR(ciErrNum, CL_SUCCESS);
+		
+    // Build the program with 'mad' Optimization option
+
+	
+#ifdef MAC
+	char* flags = "-cl-mad-enable -DMAC -DGUID_ARG";
+#else
+	//const char* flags = "-DGUID_ARG= -fno-alias";
+	const char* flags = "-DGUID_ARG= ";
+#endif
+
+	char* compileFlags = new char[strlen(additionalMacros) + strlen(flags) + 5];
+	sprintf(compileFlags, "%s %s", flags, additionalMacros);
+    ciErrNum = clBuildProgram(m_cpProgram, 0, NULL, compileFlags, NULL, NULL);
+    if (ciErrNum != CL_SUCCESS)
+    {
+		size_t numDevices;
+		clGetProgramInfo( m_cpProgram, CL_PROGRAM_DEVICES, 0, 0, &numDevices );
+		cl_device_id *devices = new cl_device_id[numDevices];
+		clGetProgramInfo( m_cpProgram, CL_PROGRAM_DEVICES, numDevices, devices, &numDevices );
+        for( int i = 0; i < 2; ++i )
+		{
+			char *build_log;
+			size_t ret_val_size;
+			clGetProgramBuildInfo(m_cpProgram, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
+			build_log = new char[ret_val_size+1];
+			clGetProgramBuildInfo(m_cpProgram, devices[i], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
+    
+			// to be carefully, terminate with \0
+			// there's no information in the reference whether the string is 0 terminated or not
+			build_log[ret_val_size] = '\0';
+        
+
+			printf("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
+			delete[] build_log;
+		}
+#ifndef BT_SUPPRESS_OPENCL_ASSERTS
+		btAssert(0);
+#endif //BT_SUPPRESS_OPENCL_ASSERTS
+		m_kernelCompilationFailures++;
+		return 0;
+    }
+	
+	
+    // Create the kernel
+    kernel = clCreateKernel(m_cpProgram, kernelName, &ciErrNum);
+    if (ciErrNum != CL_SUCCESS)
+    {
+		const char* msg = "";
+        switch(ciErrNum)
+        {
+        case CL_INVALID_PROGRAM:
+            msg = "Program is not a valid program object.";
+            break;
+        case CL_INVALID_PROGRAM_EXECUTABLE:
+            msg = "There is no successfully built executable for program.";
+            break;
+        case CL_INVALID_KERNEL_NAME:
+            msg = "kernel_name is not found in program.";
+            break;
+        case CL_INVALID_KERNEL_DEFINITION:
+            msg = "the function definition for __kernel function given by kernel_name such as the number of arguments, the argument types are not the same for all devices for which the program executable has been built.";
+            break;
+        case CL_INVALID_VALUE:
+            msg = "kernel_name is NULL.";
+            break;
+        case CL_OUT_OF_HOST_MEMORY:
+            msg = "Failure to allocate resources required by the OpenCL implementation on the host.";
+            break;
+		default:
+			{
+			}
+        }
+
+        printf("Error in clCreateKernel for kernel '%s', error is \"%s\", Line %u in file %s !!!\n\n", kernelName, msg, __LINE__, __FILE__);
+
+#ifndef BT_SUPPRESS_OPENCL_ASSERTS
+		btAssert(0);
+#endif //BT_SUPPRESS_OPENCL_ASSERTS
+		m_kernelCompilationFailures++;
+		return 0;
+    }
+
+	printf("ready. \n");
+	delete [] compileFlags;
+	if (!kernel)
+		m_kernelCompilationFailures++;
+	return kernel;
+
+}
+
+void btOpenCLSoftBodySolver::predictMotion( float timeStep )
+{
+	// Clear the collision shape array for the next frame
+	// Ensure that the DX11 ones are moved off the device so they will be updated correctly
+	m_clCollisionObjectDetails.changedOnCPU();
+	m_clPerClothCollisionObjects.changedOnCPU();
+	m_collisionObjectDetails.clear();	
+
+	if ( m_bUpdateAnchoredNodePos )
+	{
+		// In OpenCL cloth solver, if softbody node has zero inverse mass(infinite mass) or anchor attached, 
+		// we need to update the node position in case the node or anchor is animated externally.
+		// If there is no such node, we can eliminate the unnecessary CPU-to-GPU data trasferring. 
+		for ( int i = 0; i < m_anchorNodeInfoArray.size(); i++ )
+		{
+			const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[i];
+			btSoftBody::Node* node = anchorNodeInfo.pNode;
+
+			using Vectormath::Aos::Point3;
+			Point3 pos((float)node->m_x.getX(), (float)node->m_x.getY(), (float)node->m_x.getZ());				
+			m_anchorPosition[i] = pos;
+		}
+
+		if ( m_anchorNodeInfoArray.size() > 0 )
+			m_clAnchorPosition.changedOnCPU();
+
+		updateFixedVertexPositions();
+	}
+		
+	{
+		BT_PROFILE("applyForces");
+		// Apply forces that we know about to the cloths
+		applyForces(  timeStep * getTimeScale() );
+	}
+
+	{
+		BT_PROFILE("integrate");
+		// Itegrate motion for all soft bodies dealt with by the solver
+		integrate( timeStep * getTimeScale() );
+	}
+
+	{
+		BT_PROFILE("updateBounds");
+		updateBounds();
+	}
+	// End prediction work for solvers
+}
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+void btOpenCLAcceleratedSoftBodyInterface::updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound )
+{
+	float scalarMargin = (float)getSoftBody()->getCollisionShape()->getMargin();
+	btVector3 vectorMargin( scalarMargin, scalarMargin, scalarMargin );
+	m_softBody->m_bounds[0] = lowerBound - vectorMargin;
+	m_softBody->m_bounds[1] = upperBound + vectorMargin;
+}  // btOpenCLSoftBodySolver::btDX11AcceleratedSoftBodyInterface::updateBounds
+
+void btOpenCLSoftBodySolver::processCollision( btSoftBody*, btSoftBody* )
+{
+
+}
+
+// Add the collision object to the set to deal with for a particular soft body
+void btOpenCLSoftBodySolver::processCollision( btSoftBody *softBody, const btCollisionObjectWrapper* collisionObject )
+{
+ 	int softBodyIndex = findSoftBodyIndex( softBody );
+
+	if( softBodyIndex >= 0 )
+	{
+		const btCollisionShape *collisionShape = collisionObject->getCollisionShape();
+		float friction = collisionObject->getCollisionObject()->getFriction();
+		int shapeType = collisionShape->getShapeType();
+		if( shapeType == CAPSULE_SHAPE_PROXYTYPE )
+		{
+			// Add to the list of expected collision objects
+			CollisionShapeDescription newCollisionShapeDescription;
+			newCollisionShapeDescription.softBodyIdentifier = softBodyIndex;
+			newCollisionShapeDescription.collisionShapeType = shapeType;
+			// TODO: May need to transpose this matrix either here or in HLSL
+			newCollisionShapeDescription.shapeTransform = toTransform3(collisionObject->getWorldTransform());
+			const btCapsuleShape *capsule = static_cast<const btCapsuleShape*>( collisionShape );
+			newCollisionShapeDescription.radius = capsule->getRadius();
+			newCollisionShapeDescription.halfHeight = capsule->getHalfHeight();
+			newCollisionShapeDescription.margin = capsule->getMargin();
+			newCollisionShapeDescription.upAxis = capsule->getUpAxis();
+			newCollisionShapeDescription.friction = friction;
+			const btRigidBody* body = static_cast< const btRigidBody* >( collisionObject->getCollisionObject() );
+			newCollisionShapeDescription.linearVelocity = toVector3(body->getLinearVelocity());
+			newCollisionShapeDescription.angularVelocity = toVector3(body->getAngularVelocity());
+			m_collisionObjectDetails.push_back( newCollisionShapeDescription );
+
+		} 		
+		else {
+#ifdef _DEBUG
+			printf("Unsupported collision shape type\n");
+#endif
+			//btAssert(0 && "Unsupported collision shape type\n");
+		}
+	} else {
+		btAssert(0 && "Unknown soft body");
+	}
+} // btOpenCLSoftBodySolver::processCollision
+
+
+
+
+
+btOpenCLAcceleratedSoftBodyInterface* btOpenCLSoftBodySolver::findSoftBodyInterface( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btOpenCLAcceleratedSoftBodyInterface* softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyInterface;
+	}
+	return 0;
+}
+
+
+int btOpenCLSoftBodySolver::findSoftBodyIndex( const btSoftBody* const softBody )
+{
+	for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+	{
+		btOpenCLAcceleratedSoftBodyInterface* softBodyInterface = m_softBodySet[softBodyIndex];
+		if( softBodyInterface->getSoftBody() == softBody )
+			return softBodyIndex;
+	}
+	return 1;
+}
+
+bool btOpenCLSoftBodySolver::checkInitialized()
+{
+	if( !m_shadersInitialized )
+		if( buildShaders() )
+			m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
+bool btOpenCLSoftBodySolver::buildShaders()
+{
+	if( m_shadersInitialized )
+		return true;
+
+	const char* additionalMacros="";
+
+	// Ensure current kernels are released first
+	releaseKernels();
+	
+	m_currentCLFunctions->clearKernelCompilationFailures();
+
+	m_prepareLinksKernel = m_currentCLFunctions->compileCLKernelFromString( PrepareLinksCLString, "PrepareLinksKernel",additionalMacros,"OpenCLC10/PrepareLinks.cl" );
+	m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
+	m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel",additionalMacros,"OpenCLC10/SolvePositions.cl" );
+	m_vSolveLinksKernel = m_currentCLFunctions->compileCLKernelFromString( VSolveLinksCLString, "VSolveLinksKernel" ,additionalMacros,"OpenCLC10/VSolveLinks.cl");
+	m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdateNodes.cl");
+	m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel" ,additionalMacros,"OpenCLC10/UpdatePositions.cl");
+	m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel" ,additionalMacros,"OpenCLC10/SolveCollisionsAndUpdateVelocities.cl");
+	m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel" ,additionalMacros,"OpenCLC10/Integrate.cl");
+	m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel" ,additionalMacros,"OpenCLC10/ApplyForces.cl");
+	m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" , additionalMacros, "OpenCLC10/UpdateFixedVertexPositions.cl");
+
+	// TODO: Rename to UpdateSoftBodies
+	m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
+	m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
+	m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel" ,additionalMacros,"OpenCLC10/UpdateNormals.cl");
+
+
+	if( m_currentCLFunctions->getKernelCompilationFailures()==0 )
+		m_shadersInitialized = true;
+
+	return m_shadersInitialized;
+}
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h
new file mode 100644
index 000000000..6de58c4f1
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCL.h
@@ -0,0 +1,527 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
+#define BT_SOFT_BODY_SOLVER_OPENCL_H
+
+#include "stddef.h" //for size_t
+#include "vectormath/vmInclude.h"
+
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+#include "btSoftBodySolverLinkData_OpenCL.h"
+#include "btSoftBodySolverVertexData_OpenCL.h"
+#include "btSoftBodySolverTriangleData_OpenCL.h"
+
+class CLFunctions
+{
+protected:
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+
+	int	m_kernelCompilationFailures;
+
+
+public:
+	CLFunctions(cl_command_queue cqCommandQue, cl_context cxMainContext) :
+		m_cqCommandQue( cqCommandQue ),
+		m_cxMainContext( cxMainContext ),
+		m_kernelCompilationFailures(0)
+	{
+	}
+
+	int getKernelCompilationFailures() const
+	{
+		return m_kernelCompilationFailures;
+	}
+
+	/**
+	 * Compile a compute shader kernel from a string and return the appropriate cl_kernel object.
+	 */	
+	virtual cl_kernel compileCLKernelFromString( const char* kernelSource, const char* kernelName, const char* additionalMacros, const char* srcFileNameForCaching);
+
+	void	clearKernelCompilationFailures()
+	{
+		m_kernelCompilationFailures=0;
+	}
+};
+
+/**
+ * Entry in the collision shape array.
+ * Specifies the shape type, the transform matrix and the necessary details of the collisionShape.
+ */
+struct CollisionShapeDescription
+{
+	Vectormath::Aos::Transform3 shapeTransform;
+	Vectormath::Aos::Vector3 linearVelocity;
+	Vectormath::Aos::Vector3 angularVelocity;
+
+	int softBodyIdentifier;
+	int collisionShapeType;
+
+	// Both needed for capsule
+	float radius;
+	float halfHeight;
+	int upAxis;
+	
+	float margin;
+	float friction;
+
+	CollisionShapeDescription()
+	{
+		collisionShapeType = 0;
+		margin = 0;
+		friction = 0;
+	}
+};
+
+/**
+	 * SoftBody class to maintain information about a soft body instance
+	 * within a solver.
+	 * This data addresses the main solver arrays.
+	 */
+class btOpenCLAcceleratedSoftBodyInterface
+{
+protected:
+	/** Current number of vertices that are part of this cloth */
+	int m_numVertices;
+	/** Maximum number of vertices allocated to be part of this cloth */
+	int m_maxVertices;
+	/** Current number of triangles that are part of this cloth */
+	int m_numTriangles;
+	/** Maximum number of triangles allocated to be part of this cloth */
+	int m_maxTriangles;
+	/** Index of first vertex in the world allocated to this cloth */
+	int m_firstVertex;
+	/** Index of first triangle in the world allocated to this cloth */
+	int m_firstTriangle;
+	/** Index of first link in the world allocated to this cloth */
+	int m_firstLink;
+	/** Maximum number of links allocated to this cloth */
+	int m_maxLinks;
+	/** Current number of links allocated to this cloth */
+	int m_numLinks;
+
+	/** The actual soft body this data represents */
+	btSoftBody *m_softBody;
+
+
+public:
+	btOpenCLAcceleratedSoftBodyInterface( btSoftBody *softBody ) :
+	  m_softBody( softBody )
+	{
+		m_numVertices = 0;
+		m_maxVertices = 0;
+		m_numTriangles = 0;
+		m_maxTriangles = 0;
+		m_firstVertex = 0;
+		m_firstTriangle = 0;
+		m_firstLink = 0;
+		m_maxLinks = 0;
+		m_numLinks = 0;
+	}
+	int getNumVertices()
+	{
+		return m_numVertices;
+	}
+
+	int getNumTriangles()
+	{
+		return m_numTriangles;
+	}
+
+	int getMaxVertices()
+	{
+		return m_maxVertices;
+	}
+
+	int getMaxTriangles()
+	{
+		return m_maxTriangles;
+	}
+
+	int getFirstVertex()
+	{
+		return m_firstVertex;
+	}
+
+	int getFirstTriangle()
+	{
+		return m_firstTriangle;
+	}
+	
+	/**
+	 * Update the bounds in the btSoftBody object
+	 */
+	void updateBounds( const btVector3 &lowerBound, const btVector3 &upperBound );
+
+	// TODO: All of these set functions will have to do checks and
+	// update the world because restructuring of the arrays will be necessary
+	// Reasonable use of "friend"?
+	void setNumVertices( int numVertices )
+	{
+		m_numVertices = numVertices;
+	}	
+
+	void setNumTriangles( int numTriangles )
+	{
+		m_numTriangles = numTriangles;
+	}
+
+	void setMaxVertices( int maxVertices )
+	{
+		m_maxVertices = maxVertices;
+	}
+
+	void setMaxTriangles( int maxTriangles )
+	{
+		m_maxTriangles = maxTriangles;
+	}
+
+	void setFirstVertex( int firstVertex )
+	{
+		m_firstVertex = firstVertex;
+	}
+
+	void setFirstTriangle( int firstTriangle )
+	{
+		m_firstTriangle = firstTriangle;
+	}
+
+	void setMaxLinks( int maxLinks )
+	{
+		m_maxLinks = maxLinks;
+	}
+
+	void setNumLinks( int numLinks )
+	{
+		m_numLinks = numLinks;
+	}
+
+	void setFirstLink( int firstLink )
+	{
+		m_firstLink = firstLink;
+	}
+
+	int getMaxLinks()
+	{
+		return m_maxLinks;
+	}
+
+	int getNumLinks()
+	{
+		return m_numLinks;
+	}
+
+	int getFirstLink()
+	{
+		return m_firstLink;
+	}
+
+	btSoftBody* getSoftBody()
+	{
+		return m_softBody;
+	}
+
+};
+
+
+
+class btOpenCLSoftBodySolver : public btSoftBodySolver
+{
+public:
+	
+
+	struct UIntVector3
+	{
+		UIntVector3()
+		{
+			x = 0;
+			y = 0;
+			z = 0;
+			_padding = 0;
+		}
+		
+		UIntVector3( unsigned int x_, unsigned int y_, unsigned int z_ )
+		{
+			x = x_;
+			y = y_;
+			z = z_;
+			_padding = 0;
+		}
+			
+		unsigned int x;
+		unsigned int y;
+		unsigned int z;
+		unsigned int _padding;
+	};
+
+	struct CollisionObjectIndices
+	{
+		CollisionObjectIndices( int f, int e )
+		{
+			firstObject = f;
+			endObject = e;
+		}
+
+		int firstObject;
+		int endObject;
+	};
+
+	btSoftBodyLinkDataOpenCL m_linkData;
+	btSoftBodyVertexDataOpenCL m_vertexData;
+	btSoftBodyTriangleDataOpenCL m_triangleData;
+
+protected:
+
+	CLFunctions m_defaultCLFunctions;
+	CLFunctions* m_currentCLFunctions;
+
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	bool m_shadersInitialized;
+
+	/** 
+	 * Cloths owned by this solver.
+	 * Only our cloths are in this array.
+	 */
+	btAlignedObjectArray< btOpenCLAcceleratedSoftBodyInterface * > m_softBodySet;
+
+	/** Acceleration value to be applied to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothAcceleration;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clPerClothAcceleration;
+
+	/** Wind velocity to be applied normal to all non-static vertices in the solver. 
+	 * Index n is cloth n, array sized by number of cloths in the world not the solver. 
+	 */
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_perClothWindVelocity;
+	btOpenCLBuffer<Vectormath::Aos::Vector3>			m_clPerClothWindVelocity;
+
+	/** Velocity damping factor */
+	btAlignedObjectArray< float >						m_perClothDampingFactor;
+	btOpenCLBuffer<float>								m_clPerClothDampingFactor;
+
+	/** Velocity correction coefficient */
+	btAlignedObjectArray< float >						m_perClothVelocityCorrectionCoefficient;
+	btOpenCLBuffer<float>								m_clPerClothVelocityCorrectionCoefficient;
+
+	/** Lift parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothLiftFactor;
+	btOpenCLBuffer<float>								m_clPerClothLiftFactor;
+	
+	/** Drag parameter for wind effect on cloth. */
+	btAlignedObjectArray< float >						m_perClothDragFactor;
+	btOpenCLBuffer<float>								m_clPerClothDragFactor;
+
+	/** Density of the medium in which each cloth sits */
+	btAlignedObjectArray< float >						m_perClothMediumDensity;
+	btOpenCLBuffer<float>								m_clPerClothMediumDensity;
+
+	/** 
+	 * Collision shape details: pair of index of first collision shape for the cloth and number of collision objects.
+	 */
+	btAlignedObjectArray< CollisionObjectIndices >		m_perClothCollisionObjects;
+	btOpenCLBuffer<CollisionObjectIndices>				m_clPerClothCollisionObjects;
+
+	/** 
+	 * Collision shapes being passed across to the cloths in this solver.
+	 */
+	btAlignedObjectArray< CollisionShapeDescription >	m_collisionObjectDetails;
+	btOpenCLBuffer< CollisionShapeDescription >			m_clCollisionObjectDetails;
+
+
+	
+	/** 
+	 * Friction coefficient for each cloth
+	 */
+	btAlignedObjectArray< float >	m_perClothFriction;
+	btOpenCLBuffer< float >			m_clPerClothFriction;
+
+	// anchor node info
+	struct AnchorNodeInfoCL
+	{
+		int clVertexIndex;
+		btSoftBody::Node* pNode;
+	};
+
+	btAlignedObjectArray<AnchorNodeInfoCL> m_anchorNodeInfoArray;
+	btAlignedObjectArray<Vectormath::Aos::Point3> m_anchorPosition;
+	btOpenCLBuffer<Vectormath::Aos::Point3>		  m_clAnchorPosition;
+	btAlignedObjectArray<int> m_anchorIndex;
+	btOpenCLBuffer<int>		  m_clAnchorIndex;
+
+	bool m_bUpdateAnchoredNodePos;
+
+	cl_kernel		m_prepareLinksKernel;
+	cl_kernel		m_solvePositionsFromLinksKernel;
+	cl_kernel		m_updateConstantsKernel;
+	cl_kernel		m_integrateKernel;
+	cl_kernel		m_addVelocityKernel;
+	cl_kernel		m_updatePositionsFromVelocitiesKernel;
+	cl_kernel		m_updateVelocitiesFromPositionsWithoutVelocitiesKernel;
+	cl_kernel		m_updateVelocitiesFromPositionsWithVelocitiesKernel;
+	cl_kernel		m_vSolveLinksKernel;
+	cl_kernel		m_solveCollisionsAndUpdateVelocitiesKernel;
+	cl_kernel		m_resetNormalsAndAreasKernel;
+	cl_kernel		m_normalizeNormalsAndAreasKernel;
+	cl_kernel		m_updateSoftBodiesKernel;
+
+	cl_kernel		m_outputToVertexArrayKernel;
+	cl_kernel		m_applyForcesKernel;
+	cl_kernel       m_updateFixedVertexPositionsKernel;	
+
+	cl_command_queue	m_cqCommandQue;
+	cl_context			m_cxMainContext;
+	
+	size_t				m_defaultWorkGroupSize;
+
+
+	virtual bool buildShaders();
+
+	void resetNormalsAndAreas( int numVertices );
+
+	void normalizeNormalsAndAreas( int numVertices );
+
+	void executeUpdateSoftBodies( int firstTriangle, int numTriangles );
+
+	void prepareCollisionConstraints();
+	
+	Vectormath::Aos::Vector3 ProjectOnAxis( const Vectormath::Aos::Vector3 &v, const Vectormath::Aos::Vector3 &a );
+
+	void ApplyClampedForce( float solverdt, const Vectormath::Aos::Vector3 &force, const Vectormath::Aos::Vector3 &vertexVelocity, float inverseMass, Vectormath::Aos::Vector3 &vertexForce );
+	
+
+	int findSoftBodyIndex( const btSoftBody* const softBody );
+
+	virtual void applyForces( float solverdt );
+
+	void updateFixedVertexPositions();
+
+	/**
+	 * Integrate motion on the solver.
+	 */
+	virtual void integrate( float solverdt );
+
+	virtual void updateConstants( float timeStep );
+
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	void prepareLinks();
+
+	void solveLinksForVelocity( int startLink, int numLinks, float kst );
+
+	void updatePositionsFromVelocities( float solverdt );
+
+	virtual void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	
+	void updateVelocitiesFromPositionsWithVelocities( float isolverdt );
+
+	void updateVelocitiesFromPositionsWithoutVelocities( float isolverdt );
+	virtual void solveCollisionsAndUpdateVelocities( float isolverdt );
+
+	// End kernel dispatches
+	/////////////////////////////////////
+	
+	void updateBounds();
+
+	void releaseKernels();
+
+public:
+	btOpenCLSoftBodySolver(cl_command_queue queue,cl_context	ctx, bool bUpdateAchchoredNodePos = false);
+
+	virtual ~btOpenCLSoftBodySolver();
+
+
+	
+	btOpenCLAcceleratedSoftBodyInterface *findSoftBodyInterface( const btSoftBody* const softBody );
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+	virtual btSoftBodyVertexData &getVertexData();
+
+	virtual btSoftBodyTriangleData &getTriangleData();
+
+	virtual SolverTypes getSolverType() const
+	{
+		return CL_SOLVER;
+	}
+
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+	virtual void	setDefaultWorkgroupSize(size_t workGroupSize)
+	{
+		m_defaultWorkGroupSize = workGroupSize;
+	}
+	virtual size_t	getDefaultWorkGroupSize() const
+	{
+		return m_defaultWorkGroupSize;
+	}
+
+	void	setCLFunctions(CLFunctions* funcs)
+	{
+		if (funcs)
+			m_currentCLFunctions = funcs;
+		else
+			m_currentCLFunctions  = &m_defaultCLFunctions;
+	}
+
+}; // btOpenCLSoftBodySolver
+
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is the CL to CPU version.
+ */
+class btSoftBodySolverOutputCLtoCPU : public btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutputCLtoCPU()
+	{
+	}
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer );
+};
+
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp
new file mode 100644
index 000000000..0380a6dd5
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.cpp
@@ -0,0 +1,1101 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+#include <stdio.h> //@todo: remove the debugging printf at some stage
+#include "btSoftBodySolver_OpenCLSIMDAware.h"
+#include "BulletSoftBody/btSoftBodySolverVertexBuffer.h"
+#include "BulletSoftBody/btSoftBody.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include <limits.h>
+
+#define WAVEFRONT_SIZE 32
+#define WAVEFRONT_BLOCK_MULTIPLIER 2
+#define GROUP_SIZE (WAVEFRONT_SIZE*WAVEFRONT_BLOCK_MULTIPLIER)
+#define LINKS_PER_SIMD_LANE 16
+
+static const size_t workGroupSize = GROUP_SIZE;
+
+
+//CL_VERSION_1_1 seems broken on NVidia SDK so just disable it
+
+////OpenCL 1.0 kernels don't use float3
+#define MSTRINGIFY(A) #A
+static const char* UpdatePositionsFromVelocitiesCLString = 
+#include "OpenCLC10/UpdatePositionsFromVelocities.cl"
+static const char* SolvePositionsCLString = 
+#include "OpenCLC10/SolvePositionsSIMDBatched.cl"
+static const char* UpdateNodesCLString = 
+#include "OpenCLC10/UpdateNodes.cl"
+static const char* UpdatePositionsCLString = 
+#include "OpenCLC10/UpdatePositions.cl"
+static const char* UpdateConstantsCLString = 
+#include "OpenCLC10/UpdateConstants.cl"
+static const char* IntegrateCLString = 
+#include "OpenCLC10/Integrate.cl"
+static const char* ApplyForcesCLString = 
+#include "OpenCLC10/ApplyForces.cl"
+static const char* UpdateFixedVertexPositionsCLString = 
+#include "OpenCLC10/UpdateFixedVertexPositions.cl"
+static const char* UpdateNormalsCLString = 
+#include "OpenCLC10/UpdateNormals.cl"
+static const char* VSolveLinksCLString = 
+#include "OpenCLC10/VSolveLinks.cl"
+static const char* SolveCollisionsAndUpdateVelocitiesCLString =
+#include "OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl"
+static const char* OutputToVertexArrayCLString =
+#include "OpenCLC10/OutputToVertexArray.cl"
+
+
+
+btSoftBodyLinkDataOpenCLSIMDAware::btSoftBodyLinkDataOpenCLSIMDAware(cl_command_queue queue,  cl_context ctx) :
+	m_cqCommandQue(queue),
+	m_wavefrontSize( WAVEFRONT_SIZE ),
+	m_linksPerWorkItem( LINKS_PER_SIMD_LANE ),
+	m_maxBatchesWithinWave( 0 ),
+	m_maxLinksPerWavefront( m_wavefrontSize * m_linksPerWorkItem ),
+	m_numWavefronts( 0 ),
+	m_maxVertex( 0 ),
+	m_clNumBatchesAndVerticesWithinWaves( queue, ctx, &m_numBatchesAndVerticesWithinWaves, true ),
+	m_clWavefrontVerticesGlobalAddresses( queue, ctx, &m_wavefrontVerticesGlobalAddresses, true ),
+	m_clLinkVerticesLocalAddresses( queue, ctx, &m_linkVerticesLocalAddresses, true ),
+	m_clLinkStrength( queue, ctx, &m_linkStrength, false ),
+	m_clLinksMassLSC( queue, ctx, &m_linksMassLSC, false ),
+	m_clLinksRestLengthSquared( queue, ctx, &m_linksRestLengthSquared, false ),
+	m_clLinksRestLength( queue, ctx, &m_linksRestLength, false ),
+	m_clLinksMaterialLinearStiffnessCoefficient( queue, ctx, &m_linksMaterialLinearStiffnessCoefficient, false )
+{
+}
+
+btSoftBodyLinkDataOpenCLSIMDAware::~btSoftBodyLinkDataOpenCLSIMDAware()
+{
+}
+
+static Vectormath::Aos::Vector3 toVector3( const btVector3 &vec )
+{
+	Vectormath::Aos::Vector3 outVec( vec.getX(), vec.getY(), vec.getZ() );
+	return outVec;
+}
+
+/** Allocate enough space in all link-related arrays to fit numLinks links */
+void btSoftBodyLinkDataOpenCLSIMDAware::createLinks( int numLinks )
+{
+	int previousSize = m_links.size();
+	int newSize = previousSize + numLinks;
+
+	btSoftBodyLinkData::createLinks( numLinks );
+
+	// Resize the link addresses array as well
+	m_linkAddresses.resize( newSize );
+}
+
+/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+void btSoftBodyLinkDataOpenCLSIMDAware::setLinkAt( 
+	const LinkDescription &link, 
+	int linkIndex )
+{
+	btSoftBodyLinkData::setLinkAt( link, linkIndex );
+
+	if( link.getVertex0() > m_maxVertex )
+		m_maxVertex = link.getVertex0();
+	if( link.getVertex1() > m_maxVertex )
+		m_maxVertex = link.getVertex1();
+
+	// Set the link index correctly for initialisation
+	m_linkAddresses[linkIndex] = linkIndex;
+}
+
+bool btSoftBodyLinkDataOpenCLSIMDAware::onAccelerator()
+{
+	return m_onGPU;
+}
+
+bool btSoftBodyLinkDataOpenCLSIMDAware::moveToAccelerator()
+{
+	bool success = true;
+	success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
+	success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
+	success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
+	success = success && m_clLinkStrength.moveToGPU();
+	success = success && m_clLinksMassLSC.moveToGPU();
+	success = success && m_clLinksRestLengthSquared.moveToGPU();
+	success = success && m_clLinksRestLength.moveToGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveToGPU();
+
+	if( success ) {
+		m_onGPU = true;
+	}
+
+	return success;
+}
+
+bool btSoftBodyLinkDataOpenCLSIMDAware::moveFromAccelerator()
+{
+	bool success = true;
+	success = success && m_clNumBatchesAndVerticesWithinWaves.moveToGPU();
+	success = success && m_clWavefrontVerticesGlobalAddresses.moveToGPU();
+	success = success && m_clLinkVerticesLocalAddresses.moveToGPU();
+	success = success && m_clLinkStrength.moveFromGPU();
+	success = success && m_clLinksMassLSC.moveFromGPU();
+	success = success && m_clLinksRestLengthSquared.moveFromGPU();
+	success = success && m_clLinksRestLength.moveFromGPU();
+	success = success && m_clLinksMaterialLinearStiffnessCoefficient.moveFromGPU();
+
+	if( success ) {
+		m_onGPU = false;
+	}
+
+	return success;
+}
+
+
+
+
+
+
+
+
+btOpenCLSoftBodySolverSIMDAware::btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue, cl_context ctx, bool bUpdateAchchoredNodePos) :
+	btOpenCLSoftBodySolver( queue, ctx, bUpdateAchchoredNodePos ),
+	m_linkData(queue, ctx)
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+
+	m_shadersInitialized = false;
+}
+
+btOpenCLSoftBodySolverSIMDAware::~btOpenCLSoftBodySolverSIMDAware()
+{
+	releaseKernels();
+}
+
+void btOpenCLSoftBodySolverSIMDAware::optimize( btAlignedObjectArray< btSoftBody * > &softBodies ,bool forceUpdate)
+{
+	if( forceUpdate || m_softBodySet.size() != softBodies.size() )
+	{
+		// Have a change in the soft body set so update, reloading all the data
+		getVertexData().clear();
+		getTriangleData().clear();
+		getLinkData().clear();
+		m_softBodySet.resize(0);
+		m_anchorIndex.clear();
+
+		int maxPiterations = 0;
+		int maxViterations = 0;
+
+		for( int softBodyIndex = 0; softBodyIndex < softBodies.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = softBodies[ softBodyIndex ];
+			using Vectormath::Aos::Matrix3;
+			using Vectormath::Aos::Point3;
+
+			// Create SoftBody that will store the information within the solver
+			btOpenCLAcceleratedSoftBodyInterface* newSoftBody = new btOpenCLAcceleratedSoftBodyInterface( softBody );
+			m_softBodySet.push_back( newSoftBody );
+
+			m_perClothAcceleration.push_back( toVector3(softBody->getWorldInfo()->m_gravity) );
+			m_perClothDampingFactor.push_back(softBody->m_cfg.kDP);
+			m_perClothVelocityCorrectionCoefficient.push_back( softBody->m_cfg.kVCF );
+			m_perClothLiftFactor.push_back( softBody->m_cfg.kLF );
+			m_perClothDragFactor.push_back( softBody->m_cfg.kDG );
+			m_perClothMediumDensity.push_back(softBody->getWorldInfo()->air_density);
+			// Simple init values. Actually we'll put 0 and -1 into them at the appropriate time
+			m_perClothFriction.push_back(softBody->m_cfg.kDF);
+			m_perClothCollisionObjects.push_back( CollisionObjectIndices(-1, -1) );
+
+			// Add space for new vertices and triangles in the default solver for now
+			// TODO: Include space here for tearing too later
+			int firstVertex = getVertexData().getNumVertices();
+			int numVertices = softBody->m_nodes.size();
+			// Round maxVertices to a multiple of the workgroup size so we know we're safe to run over in a given group
+			// maxVertices can be increased to allow tearing, but should be used sparingly because these extra verts will always be processed
+			int maxVertices = GROUP_SIZE*((numVertices+GROUP_SIZE)/GROUP_SIZE);
+			// Allocate space for new vertices in all the vertex arrays
+			getVertexData().createVertices( numVertices, softBodyIndex, maxVertices );
+
+
+			int firstTriangle = getTriangleData().getNumTriangles();
+			int numTriangles = softBody->m_faces.size();
+			int maxTriangles = numTriangles;
+			getTriangleData().createTriangles( maxTriangles );
+
+			// Copy vertices from softbody into the solver
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				Point3 multPoint(softBody->m_nodes[vertex].m_x.getX(), softBody->m_nodes[vertex].m_x.getY(), softBody->m_nodes[vertex].m_x.getZ());
+				btSoftBodyVertexData::VertexDescription desc;
+
+				// TODO: Position in the softbody might be pre-transformed
+				// or we may need to adapt for the pose.
+				//desc.setPosition( cloth.getMeshTransform()*multPoint );
+				desc.setPosition( multPoint );
+
+				float vertexInverseMass = softBody->m_nodes[vertex].m_im;
+				desc.setInverseMass(vertexInverseMass);
+				getVertexData().setVertexAt( desc, firstVertex + vertex );
+
+				m_anchorIndex.push_back(-1);
+			}
+			for( int vertex = numVertices; vertex < maxVertices; ++vertex )
+			{
+				m_anchorIndex.push_back(-1.0);
+			}
+
+			// Copy triangles similarly
+			// We're assuming here that vertex indices are based on the firstVertex rather than the entire scene
+			for( int triangle = 0; triangle < numTriangles; ++triangle )
+			{
+				// Note that large array storage is relative to the array not to the cloth
+				// So we need to add firstVertex to each value
+				int vertexIndex0 = (softBody->m_faces[triangle].m_n[0] - &(softBody->m_nodes[0]));
+				int vertexIndex1 = (softBody->m_faces[triangle].m_n[1] - &(softBody->m_nodes[0]));
+				int vertexIndex2 = (softBody->m_faces[triangle].m_n[2] - &(softBody->m_nodes[0]));
+				btSoftBodyTriangleData::TriangleDescription newTriangle(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, vertexIndex2 + firstVertex);
+				getTriangleData().setTriangleAt( newTriangle, firstTriangle + triangle );
+				
+				// Increase vertex triangle counts for this triangle		
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex0)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex1)++;
+				getVertexData().getTriangleCount(newTriangle.getVertexSet().vertex2)++;
+			}
+
+			int firstLink = getLinkData().getNumLinks();
+			int numLinks = softBody->m_links.size();
+			int maxLinks = numLinks;
+			
+			// Allocate space for the links
+			getLinkData().createLinks( numLinks );
+
+			// Add the links
+			for( int link = 0; link < numLinks; ++link )
+			{
+				int vertexIndex0 = softBody->m_links[link].m_n[0] - &(softBody->m_nodes[0]);
+				int vertexIndex1 = softBody->m_links[link].m_n[1] - &(softBody->m_nodes[0]);
+
+				btSoftBodyLinkData::LinkDescription newLink(vertexIndex0 + firstVertex, vertexIndex1 + firstVertex, softBody->m_links[link].m_material->m_kLST);
+				newLink.setLinkStrength(1.f);
+				getLinkData().setLinkAt(newLink, firstLink + link);
+			}
+			
+			newSoftBody->setFirstVertex( firstVertex );
+			newSoftBody->setFirstTriangle( firstTriangle );
+			newSoftBody->setNumVertices( numVertices );
+			newSoftBody->setMaxVertices( maxVertices );
+			newSoftBody->setNumTriangles( numTriangles );
+			newSoftBody->setMaxTriangles( maxTriangles );
+			newSoftBody->setFirstLink( firstLink );
+			newSoftBody->setNumLinks( numLinks );
+
+			// Find maximum piterations and viterations
+			int piterations = softBody->m_cfg.piterations;
+
+            if ( piterations > maxPiterations )
+                  maxPiterations = piterations;
+
+            int viterations = softBody->m_cfg.viterations;
+
+			if ( viterations > maxViterations )
+                  maxViterations = viterations;
+
+			// zero mass
+			for( int vertex = 0; vertex < numVertices; ++vertex )
+			{
+				if ( softBody->m_nodes[vertex].m_im == 0 )
+				{
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + vertex;
+					nodeInfo.pNode = &softBody->m_nodes[vertex];
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+
+			// anchor position
+			if ( numVertices > 0 )
+			{
+				for ( int anchorIndex = 0; anchorIndex < softBody->m_anchors.size(); anchorIndex++ )
+				{
+					btSoftBody::Node* anchorNode = softBody->m_anchors[anchorIndex].m_node;
+					btSoftBody::Node* firstNode = &softBody->m_nodes[0];
+
+					AnchorNodeInfoCL nodeInfo;
+					nodeInfo.clVertexIndex = firstVertex + (int)(anchorNode - firstNode);
+					nodeInfo.pNode = anchorNode;
+
+					m_anchorNodeInfoArray.push_back(nodeInfo);
+				}
+			}			
+		}
+
+		m_anchorPosition.clear();		
+		m_anchorPosition.resize(m_anchorNodeInfoArray.size());
+
+		for ( int anchorNode = 0; anchorNode < m_anchorNodeInfoArray.size(); anchorNode++ )
+		{
+			const AnchorNodeInfoCL& anchorNodeInfo = m_anchorNodeInfoArray[anchorNode];
+			m_anchorIndex[anchorNodeInfo.clVertexIndex] = anchorNode;
+			getVertexData().getInverseMass(anchorNodeInfo.clVertexIndex) = 0.0f;
+		}
+		
+		updateConstants(0.f);
+
+		// set position and velocity iterations
+		setNumberOfPositionIterations(maxPiterations);
+		setNumberOfVelocityIterations(maxViterations);
+
+		// set wind velocity
+		m_perClothWindVelocity.resize( m_softBodySet.size() );
+		for( int softBodyIndex = 0; softBodyIndex < m_softBodySet.size(); ++softBodyIndex )
+		{
+			btSoftBody *softBody = m_softBodySet[softBodyIndex]->getSoftBody();			
+			m_perClothWindVelocity[softBodyIndex] = toVector3(softBody->getWindVelocity());
+		}
+
+		m_clPerClothWindVelocity.changedOnCPU();
+
+		// generate batches
+		m_linkData.generateBatches();		
+		m_triangleData.generateBatches();
+
+		// Build the shaders to match the batching parameters
+		buildShaders();
+	}
+}
+
+
+btSoftBodyLinkData &btOpenCLSoftBodySolverSIMDAware::getLinkData()
+{
+	// TODO: Consider setting link data to "changed" here
+	return m_linkData;
+}
+
+
+
+
+void btOpenCLSoftBodySolverSIMDAware::updateConstants( float timeStep )
+{			
+
+	using namespace Vectormath::Aos;
+
+	if( m_updateSolverConstants )
+	{
+		m_updateSolverConstants = false;
+
+		// Will have to redo this if we change the structure (tear, maybe) or various other possible changes
+
+		// Initialise link constants
+		const int numLinks = m_linkData.getNumLinks();
+		for( int linkIndex = 0; linkIndex < numLinks; ++linkIndex )
+		{
+			btSoftBodyLinkData::LinkNodePair &vertices( m_linkData.getVertexPair(linkIndex) );
+			m_linkData.getRestLength(linkIndex) = length((m_vertexData.getPosition( vertices.vertex0 ) - m_vertexData.getPosition( vertices.vertex1 )));
+			float invMass0 = m_vertexData.getInverseMass(vertices.vertex0);
+			float invMass1 = m_vertexData.getInverseMass(vertices.vertex1);
+			float linearStiffness = m_linkData.getLinearStiffnessCoefficient(linkIndex);
+			float massLSC = (invMass0 + invMass1)/linearStiffness;
+			m_linkData.getMassLSC(linkIndex) = massLSC;
+			float restLength = m_linkData.getRestLength(linkIndex);
+			float restLengthSquared = restLength*restLength;
+			m_linkData.getRestLengthSquared(linkIndex) = restLengthSquared;
+		}
+	}
+
+}
+
+
+
+void btOpenCLSoftBodySolverSIMDAware::solveConstraints( float solverdt )
+{
+
+	using Vectormath::Aos::Vector3;
+	using Vectormath::Aos::Point3;
+	using Vectormath::Aos::lengthSqr;
+	using Vectormath::Aos::dot;
+
+	// Prepare links
+	int numLinks = m_linkData.getNumLinks();
+	int numVertices = m_vertexData.getNumVertices();
+
+	float kst = 1.f;
+	float ti = 0.f;
+
+
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothVelocityCorrectionCoefficient.moveToGPU();
+
+
+	// Ensure data is on accelerator
+	m_linkData.moveToAccelerator();
+	m_vertexData.moveToAccelerator();
+
+	
+	//prepareLinks();	
+
+	prepareCollisionConstraints();
+
+	// Solve drift
+	for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+	{
+
+		for( int i = 0; i < m_linkData.m_wavefrontBatchStartLengths.size(); ++i )
+		{
+			int startWave = m_linkData.m_wavefrontBatchStartLengths[i].start;
+			int numWaves = m_linkData.m_wavefrontBatchStartLengths[i].length;
+			solveLinksForPosition( startWave, numWaves, kst, ti );
+		}
+	} // for( int iteration = 0; iteration < m_numberOfPositionIterations ; ++iteration )
+
+	
+	// At this point assume that the force array is blank - we will overwrite it
+	solveCollisionsAndUpdateVelocities( 1.f/solverdt );
+}
+
+
+//////////////////////////////////////
+// Kernel dispatches
+
+
+void btOpenCLSoftBodySolverSIMDAware::solveLinksForPosition( int startWave, int numWaves, float kst, float ti )
+{
+	cl_int ciErrNum;
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,0, sizeof(int), &startWave);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,1, sizeof(int), &numWaves);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,2, sizeof(float), &kst);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,3, sizeof(float), &ti);
+	
+	
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,4, sizeof(cl_mem), &m_linkData.m_clNumBatchesAndVerticesWithinWaves.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,5, sizeof(cl_mem), &m_linkData.m_clWavefrontVerticesGlobalAddresses.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,6, sizeof(cl_mem), &m_linkData.m_clLinkVerticesLocalAddresses.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,7, sizeof(cl_mem), &m_linkData.m_clLinksMassLSC.m_buffer);
+
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,8, sizeof(cl_mem), &m_linkData.m_clLinksRestLengthSquared.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,9, sizeof(cl_mem), &m_vertexData.m_clVertexInverseMass.m_buffer);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,10, sizeof(cl_mem), &m_vertexData.m_clVertexPosition.m_buffer);
+
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,11, WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_int2), 0);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,12, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float4), 0);
+	ciErrNum = clSetKernelArg(m_solvePositionsFromLinksKernel,13, m_linkData.getMaxVerticesPerWavefront()*WAVEFRONT_BLOCK_MULTIPLIER*sizeof(cl_float), 0);
+
+	size_t	numWorkItems = workGroupSize*((numWaves*WAVEFRONT_SIZE + (workGroupSize-1)) / workGroupSize);
+	
+	ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solvePositionsFromLinksKernel,1,NULL,&numWorkItems,&workGroupSize,0,0,0);
+	
+	if( ciErrNum!= CL_SUCCESS ) 
+	{
+		btAssert( 0 &&  "enqueueNDRangeKernel(m_solvePositionsFromLinksKernel)");
+	}
+
+} // solveLinksForPosition
+
+void btOpenCLSoftBodySolverSIMDAware::solveCollisionsAndUpdateVelocities( float isolverdt )
+{
+	// Copy kernel parameters to GPU
+	m_vertexData.moveToAccelerator();
+	m_clPerClothFriction.moveToGPU();
+	m_clPerClothDampingFactor.moveToGPU();
+	m_clPerClothCollisionObjects.moveToGPU();
+	m_clCollisionObjectDetails.moveToGPU();
+	
+	cl_int ciErrNum;
+	int numVerts = m_vertexData.getNumVertices();
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 0, sizeof(int), &numVerts);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 1, sizeof(int), &isolverdt);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 2, sizeof(cl_mem),&m_vertexData.m_clClothIdentifier.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 3, sizeof(cl_mem),&m_vertexData.m_clVertexPreviousPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 4, sizeof(cl_mem),&m_clPerClothFriction.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 5, sizeof(cl_mem),&m_clPerClothDampingFactor.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 6, sizeof(cl_mem),&m_clPerClothCollisionObjects.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 7, sizeof(cl_mem),&m_clCollisionObjectDetails.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 8, sizeof(cl_mem),&m_vertexData.m_clVertexForceAccumulator.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 9, sizeof(cl_mem),&m_vertexData.m_clVertexVelocity.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 10, sizeof(cl_mem),&m_vertexData.m_clVertexPosition.m_buffer);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 11, sizeof(CollisionShapeDescription)*16,0);
+	ciErrNum = clSetKernelArg(m_solveCollisionsAndUpdateVelocitiesKernel, 12, sizeof(cl_mem),&m_vertexData.m_clVertexInverseMass.m_buffer);
+	size_t	numWorkItems = workGroupSize*((m_vertexData.getNumVertices() + (workGroupSize-1)) / workGroupSize);
+	
+	if (numWorkItems)
+	{
+		ciErrNum = clEnqueueNDRangeKernel(m_cqCommandQue,m_solveCollisionsAndUpdateVelocitiesKernel, 1, NULL, &numWorkItems, &workGroupSize,0,0,0);
+		
+		if( ciErrNum != CL_SUCCESS ) 
+		{
+			btAssert( 0 &&  "enqueueNDRangeKernel(m_solveCollisionsAndUpdateVelocitiesKernel)");
+		}
+	}
+
+} // btOpenCLSoftBodySolverSIMDAware::updateVelocitiesFromPositionsWithoutVelocities
+
+// End kernel dispatches
+/////////////////////////////////////
+
+
+
+bool btOpenCLSoftBodySolverSIMDAware::buildShaders()
+{
+	releaseKernels();
+
+	if( m_shadersInitialized )
+		return true;
+
+	const char* additionalMacros="";
+
+	m_currentCLFunctions->clearKernelCompilationFailures();
+
+	char *wavefrontMacros = new char[256];
+
+	sprintf(
+		wavefrontMacros, 
+		"-DMAX_NUM_VERTICES_PER_WAVE=%d -DMAX_BATCHES_PER_WAVE=%d -DWAVEFRONT_SIZE=%d -DWAVEFRONT_BLOCK_MULTIPLIER=%d -DBLOCK_SIZE=%d", 
+		m_linkData.getMaxVerticesPerWavefront(),
+		m_linkData.getMaxBatchesPerWavefront(),
+		m_linkData.getWavefrontSize(),
+		WAVEFRONT_BLOCK_MULTIPLIER,
+		WAVEFRONT_BLOCK_MULTIPLIER*m_linkData.getWavefrontSize());
+	
+	m_updatePositionsFromVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsFromVelocitiesCLString, "UpdatePositionsFromVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositionsFromVelocities.cl");
+	m_solvePositionsFromLinksKernel = m_currentCLFunctions->compileCLKernelFromString( SolvePositionsCLString, "SolvePositionsFromLinksKernel", wavefrontMacros ,"OpenCLC10/SolvePositionsSIMDBatched.cl");
+	m_updateVelocitiesFromPositionsWithVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNodesCLString, "updateVelocitiesFromPositionsWithVelocitiesKernel", additionalMacros ,"OpenCLC10/UpdateNodes.cl");
+	m_updateVelocitiesFromPositionsWithoutVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdatePositionsCLString, "updateVelocitiesFromPositionsWithoutVelocitiesKernel", additionalMacros,"OpenCLC10/UpdatePositions.cl");
+	m_integrateKernel = m_currentCLFunctions->compileCLKernelFromString( IntegrateCLString, "IntegrateKernel", additionalMacros ,"OpenCLC10/Integrate.cl");
+	m_applyForcesKernel = m_currentCLFunctions->compileCLKernelFromString( ApplyForcesCLString, "ApplyForcesKernel", additionalMacros,"OpenCLC10/ApplyForces.cl" );
+	m_updateFixedVertexPositionsKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateFixedVertexPositionsCLString, "UpdateFixedVertexPositions" ,additionalMacros,"OpenCLC10/UpdateFixedVertexPositions.cl");
+	m_solveCollisionsAndUpdateVelocitiesKernel = m_currentCLFunctions->compileCLKernelFromString( SolveCollisionsAndUpdateVelocitiesCLString, "SolveCollisionsAndUpdateVelocitiesKernel", additionalMacros ,"OpenCLC10/SolveCollisionsAndUpdateVelocitiesSIMDBatched.cl");
+
+	// TODO: Rename to UpdateSoftBodies
+	m_resetNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "ResetNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
+	m_normalizeNormalsAndAreasKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "NormalizeNormalsAndAreasKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
+	m_updateSoftBodiesKernel = m_currentCLFunctions->compileCLKernelFromString( UpdateNormalsCLString, "UpdateSoftBodiesKernel", additionalMacros ,"OpenCLC10/UpdateNormals.cl");
+
+	delete [] wavefrontMacros;
+
+	if( m_currentCLFunctions->getKernelCompilationFailures()==0)
+	{
+		m_shadersInitialized = true;
+	}
+
+	return m_shadersInitialized;
+}
+
+
+
+
+static Vectormath::Aos::Transform3 toTransform3( const btTransform &transform )
+{
+	Vectormath::Aos::Transform3 outTransform;
+	outTransform.setCol(0, toVector3(transform.getBasis().getColumn(0)));
+	outTransform.setCol(1, toVector3(transform.getBasis().getColumn(1)));
+	outTransform.setCol(2, toVector3(transform.getBasis().getColumn(2)));
+	outTransform.setCol(3, toVector3(transform.getOrigin()));
+	return outTransform;	
+}
+
+
+static void generateBatchesOfWavefronts( btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, btSoftBodyLinkData &linkData, int numVertices, btAlignedObjectArray < btAlignedObjectArray <int> > &wavefrontBatches )
+{
+	// A per-batch map of truth values stating whether a given vertex is in that batch
+	// This allows us to significantly optimize the batching
+	btAlignedObjectArray <btAlignedObjectArray<bool> > mapOfVerticesInBatches;
+
+	for( int waveIndex = 0; waveIndex < linksForWavefronts.size(); ++waveIndex )
+	{
+		btAlignedObjectArray <int> &wavefront( linksForWavefronts[waveIndex] );
+
+		int batch = 0;
+		bool placed = false;
+		while( batch < wavefrontBatches.size() && !placed )
+		{
+			// Test the current batch, see if this wave shares any vertex with the waves in the batch
+			bool foundSharedVertex = false;
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				if( (mapOfVerticesInBatches[batch])[vertices.vertex0] || (mapOfVerticesInBatches[batch])[vertices.vertex1] )
+				{
+					foundSharedVertex = true;
+				}
+			}
+
+			if( !foundSharedVertex )
+			{
+				wavefrontBatches[batch].push_back( waveIndex );	
+				// Insert vertices into this batch too
+				for( int link = 0; link < wavefront.size(); ++link )
+				{
+					btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+					(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+					(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+				}
+				placed = true;
+			}
+			batch++;
+		}
+		if( batch == wavefrontBatches.size() && !placed )
+		{
+			wavefrontBatches.resize( batch + 1 );
+			wavefrontBatches[batch].push_back( waveIndex );
+
+			// And resize map as well
+			mapOfVerticesInBatches.resize( batch + 1 );
+			
+			// Resize maps with total number of vertices
+			mapOfVerticesInBatches[batch].resize( numVertices+1, false );
+
+			// Insert vertices into this batch too
+			for( int link = 0; link < wavefront.size(); ++link )
+			{
+				btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( wavefront[link] );
+				(mapOfVerticesInBatches[batch])[vertices.vertex0] = true;
+				(mapOfVerticesInBatches[batch])[vertices.vertex1] = true;
+			}
+		}
+	}
+	mapOfVerticesInBatches.clear();
+}
+
+// Function to remove an object from a vector maintaining correct ordering of the vector
+template< typename T > static void removeFromVector( btAlignedObjectArray< T > &vectorToUpdate, int indexToRemove )
+{
+	int currentSize = vectorToUpdate.size();
+	for( int i = indexToRemove; i < (currentSize-1); ++i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i+1];
+	}
+	if( currentSize > 0 )
+		vectorToUpdate.resize( currentSize - 1 );
+}
+
+/**
+ * Insert element into vectorToUpdate at index index.
+ */
+template< typename T > static void insertAtIndex( btAlignedObjectArray< T > &vectorToUpdate, int index, T element )
+{
+	vectorToUpdate.resize( vectorToUpdate.size() + 1 );
+	for( int i = (vectorToUpdate.size() - 1); i > index; --i )
+	{
+		vectorToUpdate[i] = vectorToUpdate[i-1];
+	}
+	vectorToUpdate[index] = element;
+}
+
+/** 
+ * Insert into btAlignedObjectArray assuming the array is ordered and maintaining both ordering and uniqueness.
+ * ie it treats vectorToUpdate as an ordered set.
+ */
+template< typename T > static void insertUniqueAndOrderedIntoVector( btAlignedObjectArray<T> &vectorToUpdate, T element )
+{
+	int index = 0;
+	while( index < vectorToUpdate.size() && vectorToUpdate[index] < element )
+	{
+		index++;
+	}
+	if( index == vectorToUpdate.size() || vectorToUpdate[index] != element )
+		insertAtIndex( vectorToUpdate, index, element );
+}
+
+static void generateLinksPerVertex( int numVertices, btSoftBodyLinkData &linkData, btAlignedObjectArray< int > &listOfLinksPerVertex, btAlignedObjectArray <int> &numLinksPerVertex, int &maxLinks )
+{
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numLinksPerVertex[nodes.vertex0]++;
+		numLinksPerVertex[nodes.vertex1]++;
+	}
+	int maxLinksPerVertex = 0;
+	for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+	{
+		maxLinksPerVertex = btMax(numLinksPerVertex[vertexIndex], maxLinksPerVertex);
+	}
+	maxLinks = maxLinksPerVertex;
+
+	btAlignedObjectArray< int > linksFoundPerVertex;
+	linksFoundPerVertex.resize( numVertices, 0 );
+
+	listOfLinksPerVertex.resize( maxLinksPerVertex * numVertices );
+
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		{
+			// Do vertex 0
+			int vertexIndex = nodes.vertex0;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex0];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex0] = linkForVertex + 1;
+		}
+		{
+			// Do vertex 1
+			int vertexIndex = nodes.vertex1;
+			int linkForVertex = linksFoundPerVertex[nodes.vertex1];
+			int linkAddress = vertexIndex * maxLinksPerVertex + linkForVertex;
+
+			listOfLinksPerVertex[linkAddress] = linkIndex;
+
+			linksFoundPerVertex[nodes.vertex1] = linkForVertex + 1;
+		}
+	}
+}
+
+static void computeBatchingIntoWavefronts( 
+	btSoftBodyLinkData &linkData, 
+	int wavefrontSize, 
+	int linksPerWorkItem, 
+	int maxLinksPerWavefront, 
+	btAlignedObjectArray < btAlignedObjectArray <int> > &linksForWavefronts, 
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > &batchesWithinWaves, /* wave, batch, links in batch */
+	btAlignedObjectArray< btAlignedObjectArray< int > > &verticesForWavefronts /* wavefront, vertex */
+	)
+{
+	
+
+	// Attempt generation of larger batches of links.
+	btAlignedObjectArray< bool > processedLink;
+	processedLink.resize( linkData.getNumLinks() );
+	btAlignedObjectArray< int > listOfLinksPerVertex;
+	int maxLinksPerVertex = 0;
+
+	// Count num vertices
+	int numVertices = 0;
+	for( int linkIndex = 0; linkIndex < linkData.getNumLinks(); ++linkIndex )
+	{
+		btSoftBodyLinkData::LinkNodePair nodes( linkData.getVertexPair(linkIndex) );
+		numVertices = btMax( numVertices, nodes.vertex0 + 1 );
+		numVertices = btMax( numVertices, nodes.vertex1 + 1 );
+	}
+
+	// Need list of links per vertex
+	// Compute valence of each vertex
+	btAlignedObjectArray <int> numLinksPerVertex;
+	numLinksPerVertex.resize(0);
+	numLinksPerVertex.resize( numVertices, 0 );
+
+	generateLinksPerVertex( numVertices, linkData, listOfLinksPerVertex, numLinksPerVertex, maxLinksPerVertex );
+
+	if (!numVertices)
+		return;
+
+	for( int vertex = 0; vertex < 10; ++vertex )
+	{
+		for( int link = 0; link < numLinksPerVertex[vertex]; ++link )
+		{
+			int linkAddress = vertex * maxLinksPerVertex + link;
+		}
+	}
+
+
+	// At this point we know what links we have for each vertex so we can start batching
+	
+	// We want a vertex to start with, let's go with 0
+	int currentVertex = 0;
+	int linksProcessed = 0;
+
+	btAlignedObjectArray <int> verticesToProcess;
+
+	while( linksProcessed < linkData.getNumLinks() )
+	{
+		// Next wavefront
+		int nextWavefront = linksForWavefronts.size();
+		linksForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray <int> &linksForWavefront(linksForWavefronts[nextWavefront]);
+		verticesForWavefronts.resize( nextWavefront + 1 );
+		btAlignedObjectArray<int> &vertexSet( verticesForWavefronts[nextWavefront] );
+
+		linksForWavefront.resize(0);
+
+		// Loop to find enough links to fill the wavefront
+		// Stopping if we either run out of links, or fill it
+		while( linksProcessed < linkData.getNumLinks() && linksForWavefront.size() < maxLinksPerWavefront )
+		{
+			// Go through the links for the current vertex
+			for( int link = 0; link < numLinksPerVertex[currentVertex] && linksForWavefront.size() < maxLinksPerWavefront; ++link )
+			{
+				int linkAddress = currentVertex * maxLinksPerVertex + link;
+				int linkIndex = listOfLinksPerVertex[linkAddress];
+				
+				// If we have not already processed this link, add it to the wavefront
+				// Claim it as another processed link
+				// Add the vertex at the far end to the list of vertices to process.
+				if( !processedLink[linkIndex] )
+				{
+					linksForWavefront.push_back( linkIndex );
+					linksProcessed++;
+					processedLink[linkIndex] = true;
+					int v0 = linkData.getVertexPair(linkIndex).vertex0;
+					int v1 = linkData.getVertexPair(linkIndex).vertex1;
+					if( v0 == currentVertex )
+						verticesToProcess.push_back( v1 );
+					else
+						verticesToProcess.push_back( v0 );
+				}
+			}
+			if( verticesToProcess.size() > 0 )
+			{
+				// Get the element on the front of the queue and remove it
+				currentVertex = verticesToProcess[0];
+				removeFromVector( verticesToProcess, 0 );
+			} else {		
+				// If we've not yet processed all the links, find the first unprocessed one
+				// and select one of its vertices as the current vertex
+				if( linksProcessed < linkData.getNumLinks() )
+				{
+					int searchLink = 0;
+					while( processedLink[searchLink] )
+						searchLink++;
+					currentVertex = linkData.getVertexPair(searchLink).vertex0;
+				}	
+			}
+		}
+
+		// We have either finished or filled a wavefront
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int v0 = linkData.getVertexPair( linksForWavefront[link] ).vertex0;
+			int v1 = linkData.getVertexPair( linksForWavefront[link] ).vertex1;
+			insertUniqueAndOrderedIntoVector( vertexSet, v0 );
+			insertUniqueAndOrderedIntoVector( vertexSet, v1 );
+		}
+		// Iterate over links mapped to the wave and batch those
+		// We can run a batch on each cycle trivially
+		
+		batchesWithinWaves.resize( batchesWithinWaves.size() + 1 );
+		btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWave( batchesWithinWaves[batchesWithinWaves.size()-1] );
+		
+
+		for( int link = 0; link < linksForWavefront.size(); ++link )
+		{
+			int linkIndex = linksForWavefront[link];
+			btSoftBodyLinkData::LinkNodePair vertices = linkData.getVertexPair( linkIndex );
+			
+			int batch = 0;
+			bool placed = false;
+			while( batch < batchesWithinWave.size() && !placed )
+			{
+				bool foundSharedVertex = false;
+				if( batchesWithinWave[batch].size() >= wavefrontSize )
+				{
+					// If we have already filled this batch, move on to another
+					foundSharedVertex = true;
+				} else {
+					for( int link2 = 0; link2 < batchesWithinWave[batch].size(); ++link2 )
+					{
+						btSoftBodyLinkData::LinkNodePair vertices2 = linkData.getVertexPair( (batchesWithinWave[batch])[link2] );
+
+						if( vertices.vertex0 == vertices2.vertex0 ||
+							vertices.vertex1 == vertices2.vertex0 ||
+							vertices.vertex0 == vertices2.vertex1 ||
+							vertices.vertex1 == vertices2.vertex1 )
+						{
+							foundSharedVertex = true;
+							break;
+						}
+					}
+				}
+				if( !foundSharedVertex )
+				{
+					batchesWithinWave[batch].push_back( linkIndex );
+					placed = true;
+				} else {
+					++batch;
+				}
+			}
+			if( batch == batchesWithinWave.size() && !placed )
+			{
+				batchesWithinWave.resize( batch + 1 );
+				batchesWithinWave[batch].push_back( linkIndex );
+			}
+		}
+		
+	}
+
+}
+
+void btSoftBodyLinkDataOpenCLSIMDAware::generateBatches()
+{
+	btAlignedObjectArray < btAlignedObjectArray <int> > linksForWavefronts;
+	btAlignedObjectArray < btAlignedObjectArray <int> > wavefrontBatches;
+	btAlignedObjectArray< btAlignedObjectArray < btAlignedObjectArray <int> > > batchesWithinWaves;
+	btAlignedObjectArray< btAlignedObjectArray< int > > verticesForWavefronts; // wavefronts, vertices in wavefront as an ordered set
+
+	// Group the links into wavefronts
+	computeBatchingIntoWavefronts( *this, m_wavefrontSize, m_linksPerWorkItem, m_maxLinksPerWavefront, linksForWavefronts, batchesWithinWaves, verticesForWavefronts );
+
+
+	// Batch the wavefronts
+	generateBatchesOfWavefronts( linksForWavefronts, *this, m_maxVertex, wavefrontBatches );
+
+	m_numWavefronts = linksForWavefronts.size();
+
+	// At this point we have a description of which links we need to process in each wavefront
+
+	// First correctly fill the batch ranges vector
+	int numBatches = wavefrontBatches.size();
+	m_wavefrontBatchStartLengths.resize(0);
+	int prefixSum = 0;
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		int wavesInBatch = wavefrontBatches[batchIndex].size();
+		int nextPrefixSum = prefixSum + wavesInBatch;
+		m_wavefrontBatchStartLengths.push_back( BatchPair( prefixSum, nextPrefixSum - prefixSum ) );
+
+		prefixSum += wavesInBatch;
+	}
+	
+	// Also find max number of batches within a wave
+	m_maxBatchesWithinWave = 0;
+	m_maxVerticesWithinWave = 0;
+	m_numBatchesAndVerticesWithinWaves.resize( m_numWavefronts );
+	for( int waveIndex = 0; waveIndex < m_numWavefronts; ++waveIndex )
+	{
+		// See if the number of batches in this wave is greater than the current maxium
+		int batchesInCurrentWave = batchesWithinWaves[waveIndex].size();
+		int verticesInCurrentWave = verticesForWavefronts[waveIndex].size();
+		m_maxBatchesWithinWave = btMax( batchesInCurrentWave, m_maxBatchesWithinWave );
+		m_maxVerticesWithinWave = btMax( verticesInCurrentWave, m_maxVerticesWithinWave );
+	}
+	
+	// Add padding values both for alignment and as dudd addresses within LDS to compute junk rather than branch around
+	m_maxVerticesWithinWave = 16*((m_maxVerticesWithinWave/16)+2);
+
+	// Now we know the maximum number of vertices per-wave we can resize the global vertices array
+	m_wavefrontVerticesGlobalAddresses.resize( m_maxVerticesWithinWave * m_numWavefronts );
+
+	// Grab backup copies of all the link data arrays for the sorting process
+	btAlignedObjectArray<btSoftBodyLinkData::LinkNodePair>				m_links_Backup(m_links);
+	btAlignedObjectArray<float>											m_linkStrength_Backup(m_linkStrength);
+	btAlignedObjectArray<float>											m_linksMassLSC_Backup(m_linksMassLSC);
+	btAlignedObjectArray<float>											m_linksRestLengthSquared_Backup(m_linksRestLengthSquared);
+	//btAlignedObjectArray<Vectormath::Aos::Vector3>						m_linksCLength_Backup(m_linksCLength);
+	//btAlignedObjectArray<float>											m_linksLengthRatio_Backup(m_linksLengthRatio);
+	btAlignedObjectArray<float>											m_linksRestLength_Backup(m_linksRestLength);
+	btAlignedObjectArray<float>											m_linksMaterialLinearStiffnessCoefficient_Backup(m_linksMaterialLinearStiffnessCoefficient);
+
+	// Resize to a wavefront sized batch per batch per wave so we get perfectly coherent memory accesses.
+	m_links.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkVerticesLocalAddresses.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linkStrength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMassLSC.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLengthSquared.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksRestLength.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );
+	m_linksMaterialLinearStiffnessCoefficient.resize( m_maxBatchesWithinWave * m_wavefrontSize * m_numWavefronts );	
+		
+	// Then re-order links into wavefront blocks
+
+	// Total number of wavefronts moved. This will decide the ordering of sorted wavefronts.
+	int wavefrontCount = 0;
+
+	// Iterate over batches of wavefronts, then wavefronts in the batch
+	for( int batchIndex = 0; batchIndex < numBatches; ++batchIndex )
+	{
+		btAlignedObjectArray <int> &batch( wavefrontBatches[batchIndex] );
+		int wavefrontsInBatch = batch.size();
+
+		
+		for( int wavefrontIndex = 0; wavefrontIndex < wavefrontsInBatch; ++wavefrontIndex )
+		{	
+
+			int originalWavefrontIndex = batch[wavefrontIndex];
+			btAlignedObjectArray< int > &wavefrontVertices( verticesForWavefronts[originalWavefrontIndex] );
+			int verticesUsedByWavefront = wavefrontVertices.size();
+
+			// Copy the set of vertices into the correctly structured array for use on the device
+			// Fill the non-vertices with -1s
+			// so we can mask out those reads
+			for( int vertex = 0; vertex < verticesUsedByWavefront; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = wavefrontVertices[vertex];
+			}
+			for( int vertex = verticesUsedByWavefront; vertex < m_maxVerticesWithinWave; ++vertex )
+			{
+				m_wavefrontVerticesGlobalAddresses[m_maxVerticesWithinWave * wavefrontCount + vertex] = -1;
+			}
+
+			// Obtain the set of batches within the current wavefront
+			btAlignedObjectArray < btAlignedObjectArray <int> > &batchesWithinWavefront( batchesWithinWaves[originalWavefrontIndex] );
+			// Set the size of the batches for use in the solver, correctly ordered
+			NumBatchesVerticesPair batchesAndVertices;
+			batchesAndVertices.numBatches = batchesWithinWavefront.size();
+			batchesAndVertices.numVertices = verticesUsedByWavefront;
+			m_numBatchesAndVerticesWithinWaves[wavefrontCount] = batchesAndVertices;
+			
+
+			// Now iterate over batches within the wavefront to structure the links correctly
+			for( int wavefrontBatch = 0; wavefrontBatch < batchesWithinWavefront.size(); ++wavefrontBatch )
+			{
+				btAlignedObjectArray <int> &linksInBatch( batchesWithinWavefront[wavefrontBatch] );
+				int wavefrontBatchSize = linksInBatch.size();
+
+				int batchAddressInTarget = m_maxBatchesWithinWave * m_wavefrontSize * wavefrontCount + m_wavefrontSize * wavefrontBatch;
+
+				for( int linkIndex = 0; linkIndex < wavefrontBatchSize; ++linkIndex )
+				{
+					int originalLinkAddress = linksInBatch[linkIndex];
+					// Reorder simple arrays trivially
+					m_links[batchAddressInTarget + linkIndex] = m_links_Backup[originalLinkAddress];
+					m_linkStrength[batchAddressInTarget + linkIndex] = m_linkStrength_Backup[originalLinkAddress];
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = m_linksMassLSC_Backup[originalLinkAddress];
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = m_linksRestLengthSquared_Backup[originalLinkAddress];
+					m_linksRestLength[batchAddressInTarget + linkIndex] = m_linksRestLength_Backup[originalLinkAddress];
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = m_linksMaterialLinearStiffnessCoefficient_Backup[originalLinkAddress];
+
+					// The local address is more complicated. We need to work out where a given vertex will end up
+					// by searching the set of vertices for this link and using the index as the local address
+					btSoftBodyLinkData::LinkNodePair localPair;
+					btSoftBodyLinkData::LinkNodePair globalPair = m_links[batchAddressInTarget + linkIndex];
+					localPair.vertex0 = wavefrontVertices.findLinearSearch( globalPair.vertex0 );
+					localPair.vertex1 = wavefrontVertices.findLinearSearch( globalPair.vertex1 );
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+				for( int linkIndex = wavefrontBatchSize; linkIndex < m_wavefrontSize; ++linkIndex )
+				{
+					// Put 0s into these arrays for padding for cleanliness
+					m_links[batchAddressInTarget + linkIndex] = btSoftBodyLinkData::LinkNodePair(0, 0);
+					m_linkStrength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMassLSC[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLengthSquared[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksRestLength[batchAddressInTarget + linkIndex] = 0.f;
+					m_linksMaterialLinearStiffnessCoefficient[batchAddressInTarget + linkIndex] = 0.f;
+
+
+					// For local addresses of junk data choose a set of addresses just above the range of valid ones 
+					// and cycling tyhrough % 16 so that we don't have bank conficts between all dud addresses
+					// The valid addresses will do scatter and gather in the valid range, the junk ones should happily work
+					// off the end of that range so we need no control
+					btSoftBodyLinkData::LinkNodePair localPair;
+					localPair.vertex0 = verticesUsedByWavefront + (linkIndex % 16);
+					localPair.vertex1 = verticesUsedByWavefront + (linkIndex % 16);
+					m_linkVerticesLocalAddresses[batchAddressInTarget + linkIndex] = localPair;
+				}
+
+			}
+
+			
+			wavefrontCount++;
+		}
+
+	
+	}
+
+} // void btSoftBodyLinkDataDX11SIMDAware::generateBatches()
+
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h
new file mode 100644
index 000000000..8cd838ad7
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/OpenCL/btSoftBodySolver_OpenCLSIMDAware.h
@@ -0,0 +1,81 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
+#define BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
+
+#include "stddef.h" //for size_t
+#include "vectormath/vmInclude.h"
+
+#include "btSoftBodySolver_OpenCL.h"
+#include "btSoftBodySolverBuffer_OpenCL.h"
+#include "btSoftBodySolverLinkData_OpenCLSIMDAware.h"
+#include "btSoftBodySolverVertexData_OpenCL.h"
+#include "btSoftBodySolverTriangleData_OpenCL.h"
+
+
+
+
+
+class btOpenCLSoftBodySolverSIMDAware : public btOpenCLSoftBodySolver
+{
+protected:
+	
+
+	btSoftBodyLinkDataOpenCLSIMDAware m_linkData;
+
+
+
+
+	virtual bool buildShaders();
+
+
+	void updateConstants( float timeStep );
+
+	float computeTriangleArea( 
+		const Vectormath::Aos::Point3 &vertex0,
+		const Vectormath::Aos::Point3 &vertex1,
+		const Vectormath::Aos::Point3 &vertex2 );
+
+
+	//////////////////////////////////////
+	// Kernel dispatches
+	void solveLinksForPosition( int startLink, int numLinks, float kst, float ti );
+	
+	void solveCollisionsAndUpdateVelocities( float isolverdt );
+	// End kernel dispatches
+	/////////////////////////////////////
+
+public:
+	btOpenCLSoftBodySolverSIMDAware(cl_command_queue queue,cl_context	ctx, bool bUpdateAchchoredNodePos = false);
+
+	virtual ~btOpenCLSoftBodySolverSIMDAware();
+
+	virtual SolverTypes getSolverType() const
+	{
+		return CL_SIMD_SOLVER;
+	}
+
+
+	virtual btSoftBodyLinkData &getLinkData();
+
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false);
+
+	virtual void solveConstraints( float solverdt );
+
+}; // btOpenCLSoftBodySolverSIMDAware
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_OPENCL_SIMDAWARE_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h
new file mode 100644
index 000000000..ab6721fbb
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/GpuSoftBodySolvers/Shared/btSoftBodySolverData.h
@@ -0,0 +1,748 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_DATA_H
+#define BT_SOFT_BODY_SOLVER_DATA_H
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "vectormath/vmInclude.h"
+
+
+class btSoftBodyLinkData
+{
+public:
+	/**
+	 * Class representing a link as a set of three indices into the vertex array.
+	 */
+	class LinkNodePair
+	{
+	public:
+		int vertex0;
+		int vertex1;
+
+		LinkNodePair()
+		{
+			vertex0 = 0;
+			vertex1 = 0;
+		}
+
+		LinkNodePair( int v0, int v1 )
+		{
+			vertex0 = v0;
+			vertex1 = v1;
+		}
+	};
+
+	/**
+	 * Class describing a link for input into the system.
+	 */
+	class LinkDescription
+	{
+	protected:
+		int m_vertex0;
+		int m_vertex1;
+		float m_linkLinearStiffness;
+		float m_linkStrength;
+
+	public:
+
+		LinkDescription()
+		{
+			m_vertex0 = 0;
+			m_vertex1 = 0;
+			m_linkLinearStiffness = 1.0;
+			m_linkStrength = 1.0;
+		}
+
+		LinkDescription( int newVertex0, int newVertex1, float linkLinearStiffness )
+		{
+			m_vertex0 = newVertex0;
+			m_vertex1 = newVertex1;
+			m_linkLinearStiffness = linkLinearStiffness;
+			m_linkStrength = 1.0;
+		}
+
+		LinkNodePair getVertexPair() const
+		{
+			LinkNodePair nodes;
+			nodes.vertex0 = m_vertex0;
+			nodes.vertex1 = m_vertex1;
+			return nodes;
+		}
+
+		void setVertex0( int vertex )
+		{
+			m_vertex0 = vertex;
+		}
+
+		void setVertex1( int vertex )
+		{
+			m_vertex1 = vertex;
+		}
+
+		void setLinkLinearStiffness( float linearStiffness )
+		{
+			m_linkLinearStiffness = linearStiffness;
+		}
+
+		void setLinkStrength( float strength )
+		{
+			m_linkStrength = strength;
+		}
+
+		int getVertex0() const
+		{
+			return m_vertex0;
+		}
+
+		int getVertex1() const
+		{
+			return m_vertex1;
+		}
+
+		float getLinkStrength() const
+		{
+			return m_linkStrength;
+		}
+
+		float getLinkLinearStiffness() const
+		{
+			return m_linkLinearStiffness;
+		}
+	};
+
+
+protected:
+	// NOTE:
+	// Vertex reference data is stored relative to global array, not relative to individual cloth.
+	// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
+	// to another.
+
+	btAlignedObjectArray< LinkNodePair > m_links; // Vertex pair for the link
+	btAlignedObjectArray< float >								m_linkStrength; // Strength of each link
+	// (inverseMassA + inverseMassB)/ linear stiffness coefficient
+	btAlignedObjectArray< float >								m_linksMassLSC; 
+	btAlignedObjectArray< float >								m_linksRestLengthSquared; 
+	// Current vector length of link
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >			m_linksCLength;
+	// 1/(current length * current length * massLSC)
+	btAlignedObjectArray< float >								m_linksLengthRatio; 
+	btAlignedObjectArray< float >								m_linksRestLength;
+	btAlignedObjectArray< float >								m_linksMaterialLinearStiffnessCoefficient;
+
+public:
+	btSoftBodyLinkData()
+	{
+	}
+
+	virtual ~btSoftBodyLinkData()
+	{
+	}
+
+	virtual void clear()
+	{
+		m_links.resize(0);
+		m_linkStrength.resize(0);
+		m_linksMassLSC.resize(0);
+		m_linksRestLengthSquared.resize(0);
+		m_linksLengthRatio.resize(0);
+		m_linksRestLength.resize(0);
+		m_linksMaterialLinearStiffnessCoefficient.resize(0);
+	}
+
+	int getNumLinks()
+	{
+		return m_links.size();
+	}
+
+	/** Allocate enough space in all link-related arrays to fit numLinks links */
+	virtual void createLinks( int numLinks )
+	{
+		int previousSize = m_links.size();
+		int newSize = previousSize + numLinks;
+
+		// Resize all the arrays that store link data
+		m_links.resize( newSize );
+		m_linkStrength.resize( newSize );
+		m_linksMassLSC.resize( newSize );
+		m_linksRestLengthSquared.resize( newSize );
+		m_linksCLength.resize( newSize );
+		m_linksLengthRatio.resize( newSize );
+		m_linksRestLength.resize( newSize );
+		m_linksMaterialLinearStiffnessCoefficient.resize( newSize );
+	}
+	
+	/** Insert the link described into the correct data structures assuming space has already been allocated by a call to createLinks */
+	virtual void setLinkAt( const LinkDescription &link, int linkIndex )
+	{
+		m_links[linkIndex] = link.getVertexPair();
+		m_linkStrength[linkIndex] = link.getLinkStrength();
+		m_linksMassLSC[linkIndex] = 0.f;
+		m_linksRestLengthSquared[linkIndex] = 0.f;
+		m_linksCLength[linkIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_linksLengthRatio[linkIndex] = 0.f;
+		m_linksRestLength[linkIndex] = 0.f;
+		m_linksMaterialLinearStiffnessCoefficient[linkIndex] = link.getLinkLinearStiffness();
+	}
+
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data from host memory from the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator()
+	{
+		return true;
+	}
+
+
+
+	/**
+	 * Return reference to the vertex index pair for link linkIndex as stored on the host.
+	 */
+	LinkNodePair &getVertexPair( int linkIndex )
+	{
+		return m_links[linkIndex];
+	}
+
+	/** 
+	 * Return reference to strength of link linkIndex as stored on the host.
+	 */
+	float &getStrength( int linkIndex )
+	{
+		return m_linkStrength[linkIndex];
+	}
+
+	/**
+	 * Return a reference to the strength of the link corrected for link sorting.
+	 * This is important if we are using data on an accelerator which has the data sorted in some fashion.
+	 */
+	virtual float &getStrengthCorrected( int linkIndex )
+	{
+		return getStrength( linkIndex );
+	}
+
+	/**
+	 * Return reference to the rest length of link linkIndex as stored on the host.
+	 */
+	float &getRestLength( int linkIndex )
+	{
+		return m_linksRestLength[linkIndex];
+	}
+
+	/**
+	 * Return reference to linear stiffness coefficient for link linkIndex as stored on the host.
+	 */
+	float &getLinearStiffnessCoefficient( int linkIndex )
+	{
+		return m_linksMaterialLinearStiffnessCoefficient[linkIndex];
+	}
+
+	/**
+	 * Return reference to the MassLSC value for link linkIndex as stored on the host.
+	 */
+	float &getMassLSC( int linkIndex )
+	{
+		return m_linksMassLSC[linkIndex];
+	}
+
+	/**
+	 * Return reference to rest length squared for link linkIndex as stored on the host.
+	 */
+	float &getRestLengthSquared( int linkIndex )
+	{
+		return m_linksRestLengthSquared[linkIndex];
+	}
+
+	/**
+	 * Return reference to current length of link linkIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getCurrentLength( int linkIndex )
+	{
+		return m_linksCLength[linkIndex];
+	}
+
+	 /**
+	  * Return the link length ratio from for link linkIndex as stored on the host.
+	  */
+	 float &getLinkLengthRatio( int linkIndex )
+	 {
+		 return m_linksLengthRatio[linkIndex];
+	 }
+};
+
+
+
+/**
+ * Wrapper for vertex data information.
+ * By wrapping it like this we stand a good chance of being able to optimise for storage format easily.
+ * It should also help us make sure all the data structures remain consistent.
+ */
+class btSoftBodyVertexData
+{
+public:
+	/**
+	 * Class describing a vertex for input into the system.
+	 */
+	class VertexDescription
+	{
+	private:
+		Vectormath::Aos::Point3 m_position;
+		/** Inverse mass. If this is 0f then the mass was 0 because that simplifies calculations. */
+		float m_inverseMass;
+
+	public:
+		VertexDescription()
+		{	
+			m_position = Vectormath::Aos::Point3( 0.f, 0.f, 0.f );
+			m_inverseMass = 0.f;
+		}
+
+		VertexDescription( const Vectormath::Aos::Point3 &position, float mass )
+		{
+			m_position = position;
+			if( mass > 0.f )
+				m_inverseMass = 1.0f/mass;
+			else
+				m_inverseMass = 0.f;
+		}
+
+		void setPosition( const Vectormath::Aos::Point3 &position )
+		{
+			m_position = position;
+		}
+
+		void setInverseMass( float inverseMass )
+		{
+			m_inverseMass = inverseMass;
+		}
+
+		void setMass( float mass )
+		{
+			if( mass > 0.f )
+				m_inverseMass = 1.0f/mass;
+			else
+				m_inverseMass = 0.f;
+		}
+
+		Vectormath::Aos::Point3 getPosition() const
+		{
+			return m_position;
+		}
+
+		float getInverseMass() const
+		{
+			return m_inverseMass;
+		}
+
+		float getMass() const
+		{
+			if( m_inverseMass == 0.f )
+				return 0.f;
+			else
+				return 1.0f/m_inverseMass;
+		}
+	};
+protected:
+
+	// identifier for the individual cloth
+	// For the CPU we don't really need this as we can grab the cloths and iterate over only their vertices
+	// For a parallel accelerator knowing on a per-vertex basis which cloth we're part of will help for obtaining
+	// per-cloth data
+	// For sorting etc it might also be helpful to be able to use in-array data such as this.
+	btAlignedObjectArray< int >							m_clothIdentifier;
+	btAlignedObjectArray< Vectormath::Aos::Point3 >		m_vertexPosition;			// vertex positions
+	btAlignedObjectArray< Vectormath::Aos::Point3 >		m_vertexPreviousPosition;	// vertex positions
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexVelocity;			// Velocity
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexForceAccumulator;	// Force accumulator
+	btAlignedObjectArray< Vectormath::Aos::Vector3 >	m_vertexNormal;				// Normals
+	btAlignedObjectArray< float >						m_vertexInverseMass;		// Inverse mass
+	btAlignedObjectArray< float >						m_vertexArea;				// Area controlled by the vertex
+	btAlignedObjectArray< int >							m_vertexTriangleCount;		// Number of triangles touching this vertex
+
+public:
+	btSoftBodyVertexData()
+	{
+	}
+
+	virtual ~btSoftBodyVertexData()
+	{
+	}
+
+	virtual void clear()
+	{
+		m_clothIdentifier.resize(0);
+		m_vertexPosition.resize(0);
+		m_vertexPreviousPosition.resize(0);
+		m_vertexVelocity.resize(0);
+		m_vertexForceAccumulator.resize(0);
+		m_vertexNormal.resize(0);
+		m_vertexInverseMass.resize(0);
+		m_vertexArea.resize(0);
+		m_vertexTriangleCount.resize(0);
+	}
+
+	int getNumVertices()
+	{
+		return m_vertexPosition.size();
+	}
+
+	int getClothIdentifier( int vertexIndex )
+	{
+		return m_clothIdentifier[vertexIndex];
+	}
+
+	void setVertexAt( const VertexDescription &vertex, int vertexIndex )
+	{
+		m_vertexPosition[vertexIndex] = vertex.getPosition();
+		m_vertexPreviousPosition[vertexIndex] = vertex.getPosition();
+		m_vertexVelocity[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexForceAccumulator[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexNormal[vertexIndex] = Vectormath::Aos::Vector3(0.f, 0.f, 0.f);
+		m_vertexInverseMass[vertexIndex] = vertex.getInverseMass();
+		m_vertexArea[vertexIndex] = 0.f;
+		m_vertexTriangleCount[vertexIndex] = 0;
+	}
+
+	/** 
+	 * Create numVertices new vertices for cloth clothIdentifier 
+	 * maxVertices allows a buffer zone of extra vertices for alignment or tearing reasons.
+	 */
+	void createVertices( int numVertices, int clothIdentifier, int maxVertices = 0 )
+	{
+		int previousSize = m_vertexPosition.size();
+		if( maxVertices == 0 )
+			maxVertices = numVertices;
+		int newSize = previousSize + maxVertices;
+
+		// Resize all the arrays that store vertex data
+		m_clothIdentifier.resize( newSize );
+		m_vertexPosition.resize( newSize );
+		m_vertexPreviousPosition.resize( newSize );
+		m_vertexVelocity.resize( newSize );
+		m_vertexForceAccumulator.resize( newSize );
+		m_vertexNormal.resize( newSize );
+		m_vertexInverseMass.resize( newSize );
+		m_vertexArea.resize( newSize );
+		m_vertexTriangleCount.resize( newSize );
+
+		for( int vertexIndex = previousSize; vertexIndex < newSize; ++vertexIndex )
+			m_clothIdentifier[vertexIndex] = clothIdentifier;
+		for( int vertexIndex = (previousSize + numVertices); vertexIndex < newSize; ++vertexIndex )
+			m_clothIdentifier[vertexIndex] = -1;
+	}
+
+	// Get and set methods in header so they can be inlined
+
+	/**
+	 * Return a reference to the position of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Point3 &getPosition( int vertexIndex )
+	{
+		return m_vertexPosition[vertexIndex];
+	}
+
+	Vectormath::Aos::Point3 getPosition( int vertexIndex ) const
+	{
+		return m_vertexPosition[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the previous position of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Point3 &getPreviousPosition( int vertexIndex )
+	{
+		return m_vertexPreviousPosition[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the velocity of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getVelocity( int vertexIndex )
+	{
+		return m_vertexVelocity[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the force accumulator of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getForceAccumulator( int vertexIndex )
+	{
+		return m_vertexForceAccumulator[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the normal of vertex vertexIndex as stored on the host.
+	 */
+	Vectormath::Aos::Vector3 &getNormal( int vertexIndex )
+	{
+		return m_vertexNormal[vertexIndex];
+	}
+
+	Vectormath::Aos::Vector3 getNormal( int vertexIndex ) const
+	{
+		return m_vertexNormal[vertexIndex];
+	}
+
+	/**
+	 * Return a reference to the inverse mass of vertex vertexIndex as stored on the host.
+	 */
+	float &getInverseMass( int vertexIndex )
+	{
+		return m_vertexInverseMass[vertexIndex];
+	}
+
+	/**
+	 * Get access to the area controlled by this vertex.
+	 */
+	float &getArea( int vertexIndex )
+	{
+		return m_vertexArea[vertexIndex];
+	}
+
+	/**
+	 * Get access to the array of how many triangles touch each vertex.
+	 */
+	int &getTriangleCount( int vertexIndex )
+	{
+		return m_vertexTriangleCount[vertexIndex];
+	}
+
+
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data to host memory from the accelerator if bCopy is false.
+	 * If bCopy is true, copy data to host memory from the accelerator so that data 
+	 * won't be moved to accelerator when moveToAccelerator() is called next time. 
+	 * If bCopyMinimum is true, only vertex position and normal are copied.
+	 * bCopyMinimum will be meaningful only if bCopy is true.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator(bool bCopy = false, bool bCopyMinimum = true)
+	{
+		return true;
+	}
+
+	btAlignedObjectArray< Vectormath::Aos::Point3 >	&getVertexPositions()
+	{
+		return m_vertexPosition;
+	}
+};
+
+
+class btSoftBodyTriangleData
+{
+public:
+	/**
+	 * Class representing a triangle as a set of three indices into the
+	 * vertex array.
+	 */
+	class TriangleNodeSet
+	{
+	public:
+		int vertex0;
+		int vertex1;
+		int vertex2;
+		int _padding;
+
+		TriangleNodeSet( )
+		{
+			vertex0 = 0;
+			vertex1 = 0;
+			vertex2 = 0;
+			_padding = -1;
+		}
+
+		TriangleNodeSet( int newVertex0, int newVertex1, int newVertex2 )
+		{
+			vertex0 = newVertex0;
+			vertex1 = newVertex1;
+			vertex2 = newVertex2;
+		}
+	};
+
+	class TriangleDescription
+	{
+	protected:
+		int m_vertex0;
+		int m_vertex1;
+		int m_vertex2;
+
+	public:
+		TriangleDescription()
+		{
+			m_vertex0 = 0;
+			m_vertex1 = 0;
+			m_vertex2 = 0;
+		}
+
+		TriangleDescription( int newVertex0, int newVertex1, int newVertex2 )
+		{
+			m_vertex0 = newVertex0;
+			m_vertex1 = newVertex1;
+			m_vertex2 = newVertex2;
+		}
+
+		TriangleNodeSet getVertexSet() const
+		{
+			btSoftBodyTriangleData::TriangleNodeSet nodes;
+			nodes.vertex0 = m_vertex0;
+			nodes.vertex1 = m_vertex1;
+			nodes.vertex2 = m_vertex2;
+			return nodes;
+		}
+	};
+
+protected:
+	// NOTE:
+	// Vertex reference data is stored relative to global array, not relative to individual cloth.
+	// Values must be correct if being passed into single-cloth VBOs or when migrating from one solver
+	// to another.
+	btAlignedObjectArray< TriangleNodeSet > m_vertexIndices;
+	btAlignedObjectArray< float > m_area;
+	btAlignedObjectArray< Vectormath::Aos::Vector3 > m_normal;
+
+public:
+	btSoftBodyTriangleData()
+	{
+	}
+
+	virtual ~btSoftBodyTriangleData()
+	{
+
+	}
+
+	virtual void clear()
+	{
+		m_vertexIndices.resize(0);
+		m_area.resize(0);
+		m_normal.resize(0);
+	}
+
+	int getNumTriangles()
+	{
+		return m_vertexIndices.size();
+	}
+
+	virtual void setTriangleAt( const TriangleDescription &triangle, int triangleIndex )
+	{
+		m_vertexIndices[triangleIndex] = triangle.getVertexSet();
+	}
+
+	virtual void createTriangles( int numTriangles )		
+	{
+		int previousSize = m_vertexIndices.size();
+		int newSize = previousSize + numTriangles;
+
+		// Resize all the arrays that store triangle data
+		m_vertexIndices.resize( newSize );
+		m_area.resize( newSize );
+		m_normal.resize( newSize );
+	}
+
+	/**
+	 * Return the vertex index set for triangle triangleIndex as stored on the host.
+	 */
+	const TriangleNodeSet &getVertexSet( int triangleIndex )
+	{
+		return m_vertexIndices[triangleIndex];
+	}
+
+	/**
+	 * Get access to the triangle area.
+	 */
+	float &getTriangleArea( int triangleIndex )
+	{
+		return m_area[triangleIndex];
+	}
+
+	/**
+	 * Get access to the normal vector for this triangle.
+	 */
+	Vectormath::Aos::Vector3 &getNormal( int triangleIndex )
+	{
+		return m_normal[triangleIndex];
+	}
+
+	/**
+	 * Return true if data is on the accelerator.
+	 * The CPU version of this class will return true here because
+	 * the CPU is the same as the accelerator.
+	 */
+	virtual bool onAccelerator()
+	{
+		return true;
+	}
+	
+	/**
+	 * Move data from host memory to the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveToAccelerator()
+	{
+		return true;
+	}
+
+	/**
+	 * Move data from host memory from the accelerator.
+	 * The CPU version will always return that it has moved it.
+	 */
+	virtual bool moveFromAccelerator()
+	{
+		return true;
+	}
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_DATA_H
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/HeapManager.h b/Engine/lib/bullet/src/BulletMultiThreaded/HeapManager.h
new file mode 100644
index 000000000..b2da4ef55
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/HeapManager.h
@@ -0,0 +1,117 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef BT_HEAP_MANAGER_H__
+#define BT_HEAP_MANAGER_H__
+
+#ifdef __SPU__
+	#define HEAP_STACK_SIZE 32
+#else
+	#define HEAP_STACK_SIZE 64
+#endif
+
+#define MIN_ALLOC_SIZE 16
+
+
+class HeapManager
+{
+private:
+	ATTRIBUTE_ALIGNED16(unsigned char *mHeap);
+	ATTRIBUTE_ALIGNED16(unsigned int mHeapBytes);
+	ATTRIBUTE_ALIGNED16(unsigned char *mPoolStack[HEAP_STACK_SIZE]);
+	ATTRIBUTE_ALIGNED16(unsigned int mCurStack);
+	
+public:
+	enum {ALIGN16,ALIGN128};
+
+	HeapManager(unsigned char *buf,int bytes)
+	{
+		mHeap = buf;
+		mHeapBytes = bytes;
+		clear();
+	}
+	
+	~HeapManager()
+	{
+	}
+	
+	int getAllocated()
+	{
+		return (int)(mPoolStack[mCurStack]-mHeap);
+	}
+	
+	int getRest()
+	{
+		return mHeapBytes-getAllocated();
+	}
+
+	void *allocate(size_t bytes,int alignment = ALIGN16)
+	{
+		if(bytes <= 0) bytes = MIN_ALLOC_SIZE;
+		btAssert(mCurStack < (HEAP_STACK_SIZE-1));
+
+		
+#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64__)
+		unsigned long long p = (unsigned long long )mPoolStack[mCurStack];
+		if(alignment == ALIGN128) {
+			p = ((p+127) & 0xffffffffffffff80);
+			bytes = (bytes+127) & 0xffffffffffffff80;
+		}
+		else {
+			bytes = (bytes+15) & 0xfffffffffffffff0;
+		}
+
+		btAssert(bytes <=(mHeapBytes-(p-(unsigned long long )mHeap)) );
+		
+#else
+		unsigned long p = (unsigned long )mPoolStack[mCurStack];
+		if(alignment == ALIGN128) {
+			p = ((p+127) & 0xffffff80);
+			bytes = (bytes+127) & 0xffffff80;
+		}
+		else {
+			bytes = (bytes+15) & 0xfffffff0;
+		}
+		btAssert(bytes <=(mHeapBytes-(p-(unsigned long)mHeap)) );
+#endif
+		unsigned char * bla = (unsigned char *)(p + bytes);
+		mPoolStack[++mCurStack] = bla;
+		return (void*)p;
+	}
+
+	void deallocate(void *p)
+	{
+		(void) p;
+		mCurStack--;
+	}
+	
+	void clear()
+	{
+		mPoolStack[0] = mHeap;
+		mCurStack = 0;
+	}
+
+//	void printStack()
+//	{
+//		for(unsigned int i=0;i<=mCurStack;i++) {
+//			PRINTF("memStack %2d 0x%x\n",i,(uint32_t)mPoolStack[i]);
+//		}
+//	}
+
+};
+
+#endif //BT_HEAP_MANAGER_H__
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/Jamfile b/Engine/lib/bullet/src/BulletMultiThreaded/Jamfile
deleted file mode 100644
index 9f0c8d732..000000000
--- a/Engine/lib/bullet/src/BulletMultiThreaded/Jamfile
+++ /dev/null
@@ -1,14 +0,0 @@
-SubDir TOP src BulletMultiThreaded ;
-
-#IncludeDir src/BulletMultiThreaded ;
-
-Library bulletmultithreaded : [ Wildcard . : *.h *.cpp ] [ Wildcard MiniCLTask : *.h *.cpp ] [ Wildcard SpuNarrowPhaseCollisionTask : *.h *.cpp  ]  : noinstall ;
-CFlags bulletmultithreaded : [ FIncludes $(TOP)/src/BulletMultiThreaded ] [ FIncludes $(TOP)/src/BulletMultiThreaded/vectormath/scalar/cpp ] ;
-LibDepends bulletmultithreaded :  ;
-
-   MsvcIncDirs bulletmultithreaded : 
-	"../../src/BulletMultiThreaded"  
-	"../../src/BulletMultiThreaded/vectormath/scalar/cpp"
-	;
-
-InstallHeader [ Wildcard *.h ] : bulletmultithreaded ;
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/Makefile.original b/Engine/lib/bullet/src/BulletMultiThreaded/Makefile.original
deleted file mode 100644
index d0a8318b6..000000000
--- a/Engine/lib/bullet/src/BulletMultiThreaded/Makefile.original
+++ /dev/null
@@ -1,187 +0,0 @@
-__ARCH_BITS__ := 32
-
-# define macros
-NARROWPHASEDIR=./SpuNarrowPhaseCollisionTask
-SPU_TASKFILE=$(NARROWPHASEDIR)/SpuGatheringCollisionTask
-
-IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
-
-ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
-        CELL_TOP ?= /opt/cell/sdk
-	CELL_SYSROOT := /opt/cell/sysroot
-else
-        CELL_TOP ?= /opt/ibm/cell-sdk/prototype
-	CELL_SYSROOT := $(CELL_TOP)/sysroot
-endif
-
-
-USE_CCACHE=ccache
-RM=rm -f 
-OUTDIR=./out
-DEBUGFLAG=-DNDEBUG
-LIBOUTDIR=../../lib/ibmsdk
-COLLISIONDIR=../../src/BulletCollision
-MATHDIR=../../src/LinearMath
-ARCHITECTUREFLAG=-m$(__ARCH_BITS__)
-ifeq "$(__ARCH_BITS__)" "64"
-  SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__ -DUSE_ADDR64
-else
-  SPU_DEFFLAGS= -DUSE_LIBSPE2 -D__SPU__
-endif
-
-SPU_DEFFLAGS+=-DUSE_PE_BOX_BOX
-
-SPU_GCC=$(USE_CCACHE) /usr/bin/spu-gcc
-SPU_INCLUDEDIR= -Ivectormath/scalar/cpp -I. -I$(CELL_SYSROOT)/usr/spu/include -I../../src -I$(NARROWPHASEDIR)
-#SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -Os -c -include spu_intrinsics.h -include stdbool.h
-SPU_CFLAGS= $(DEBUGFLAG) -W -Wall -Winline -O3 -mbranch-hints -fomit-frame-pointer -ftree-vectorize -finline-functions -ftree-vect-loop-version -ftree-loop-optimize -ffast-math -fno-rtti -fno-exceptions -c -include spu_intrinsics.h -include stdbool.h
-
-SPU_LFLAGS= -Wl,-N
-SPU_LIBRARIES=-lstdc++
-SPU_EMBED=/usr/bin/ppu-embedspu
-SPU_AR=/usr/bin/ar
-SYMBOLNAME=spu_program
-
-ifeq "$(__ARCH_BITS__)" "64"
-  PPU_DEFFLAGS= -DUSE_LIBSPE2 -DUSE_ADDR64
-  PPU_GCC=$(USE_CCACHE) /usr/bin/ppu-gcc
-else
-  PPU_DEFFLAGS= -DUSE_LIBSPE2
-  PPU_GCC=$(USE_CCACHE) /usr/bin/ppu32-gcc
-endif
-
-PPU_CFLAGS= $(ARCHITECTUREFLAG) $(DEBUGFLAG) -W -Wall -Winline -O3 -c -mabi=altivec -maltivec -include altivec.h -include stdbool.h
-PPU_INCLUDEDIR= -I. -I$(CELL_SYSROOT)/usr/include -I../../src -I$(NARROWPHASEDIR)
-PPU_LFLAGS= $(ARCHITECTUREFLAG) -Wl,-m,elf$(__ARCH_BITS__)ppc
-PPU_LIBRARIES= -lstdc++ -lsupc++ -lgcc -lgcov -lspe2 -lpthread -L../../lib/ibmsdk -lbulletcollision -lbulletdynamics -lbulletmath -L$(CELL_SYSROOT)/usr/lib$(__ARCH_BITS__) -R$(CELL_SYSROOT)/usr/lib
-PPU_AR=/usr/bin/ar
-
-MakeOut :
-#	rm -f -R $(OUTDIR) ; mkdir $(OUTDIR)
-	@echo "usage: make spu, make ppu, make all, or make clean"
-# SPU
-SpuTaskFile : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/SpuTaskFile.o $(SPU_TASKFILE).cpp
-
-boxBoxDistance : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuFakeDma : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuContactManifoldCollisionAlgorithm_spu : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuContactManifoldCollisionAlgorithm.cpp
-
-SpuCollisionShapes : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuContactResult : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-#SpuGatheringCollisionTask : MakeOut
-#	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuGjkPairDetector: MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuMinkowskiPenetrationDepthSolver : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-SpuVoronoiSimplexSolver : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(NARROWPHASEDIR)/$@.cpp
-
-#SpuLibspe2Support_spu : MakeOut
-#	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o SpuLibspe2Support.cpp
-
-## SPU-Bullet
-btPersistentManifold : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/NarrowPhaseCollision/$@.cpp
-
-btOptimizedBvh : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btCollisionObject : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionDispatch/$@.cpp
-
-btTriangleCallback : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btTriangleIndexVertexArray : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btStridingMeshInterface : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(COLLISIONDIR)/CollisionShapes/$@.cpp
-
-btAlignedAllocator : MakeOut
-	$(SPU_GCC) $(SPU_DEFFLAGS) $(SPU_CFLAGS) $(SPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $(MATHDIR)/$@.cpp
-
-
-# PPU
-SpuGatheringCollisionDispatcher : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SequentialThreadSupport: MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuLibspe2Support: MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-btThreadSupportInterface: MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuCollisionTaskProcess : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-
-SpuContactManifoldCollisionAlgorithm : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-	
-SpuSampleTaskProcess : MakeOut
-	$(PPU_GCC) $(PPU_DEFFLAGS) $(PPU_CFLAGS) $(PPU_INCLUDEDIR) -o $(OUTDIR)/$@.o $@.cpp
-	
-
-
-spu : boxBoxDistance SpuFakeDma SpuContactManifoldCollisionAlgorithm_spu SpuContactResult SpuTaskFile \
-      SpuGjkPairDetector SpuMinkowskiPenetrationDepthSolver SpuVoronoiSimplexSolver SpuCollisionShapes \
-      btPersistentManifold btOptimizedBvh btCollisionObject btTriangleCallback btTriangleIndexVertexArray \
-      btStridingMeshInterface btAlignedAllocator
-	$(SPU_GCC) -o $(OUTDIR)/spuCollision.elf \
-                            $(OUTDIR)/SpuTaskFile.o \
-                            $(OUTDIR)/SpuFakeDma.o \
-			    $(OUTDIR)/boxBoxDistance.o \
-                            $(OUTDIR)/SpuContactManifoldCollisionAlgorithm_spu.o \
-                            $(OUTDIR)/SpuContactResult.o \
-                            $(OUTDIR)/SpuCollisionShapes.o \
-                            $(OUTDIR)/SpuGjkPairDetector.o \
-                            $(OUTDIR)/SpuMinkowskiPenetrationDepthSolver.o \
-                            $(OUTDIR)/SpuVoronoiSimplexSolver.o \
-                            $(OUTDIR)/btPersistentManifold.o \
-                            $(OUTDIR)/btTriangleCallback.o \
-                            $(OUTDIR)/btTriangleIndexVertexArray.o \
-                            $(OUTDIR)/btStridingMeshInterface.o \
-                            $(OUTDIR)/btAlignedAllocator.o \
-                            $(SPU_LFLAGS) $(SPU_LIBRARIES)
-
-spu-embed : spu
-	$(SPU_EMBED) $(ARCHITECTUREFLAG) $(SYMBOLNAME) $(OUTDIR)/spuCollision.elf $(OUTDIR)/$@.o
-	$(SPU_AR) -qcs $(LIBOUTDIR)/libspu.a $(OUTDIR)/$@.o
-
-
-
-ppu : SpuGatheringCollisionDispatcher SpuCollisionTaskProcess btThreadSupportInterface \
-      SpuLibspe2Support SpuContactManifoldCollisionAlgorithm SpuSampleTaskProcess
-	$(PPU_AR) -qcs $(LIBOUTDIR)/bulletmultithreaded.a \
-                                                          $(OUTDIR)/SpuCollisionTaskProcess.o \
-                                                          $(OUTDIR)/SpuSampleTaskProcess.o \
-                                                          $(OUTDIR)/SpuGatheringCollisionDispatcher.o \
-                                                          $(OUTDIR)/SpuLibspe2Support.o \
-                                                          $(OUTDIR)/btThreadSupportInterface.o \
-							  $(OUTDIR)/SpuContactManifoldCollisionAlgorithm.o
-
-all : spu-embed ppu 
-
-clean:
-	$(RM) $(OUTDIR)/* ; $(RM) $(LIBOUTDIR)/libspu.a ; $(RM) $(LIBOUTDIR)/bulletmultithreaded.a
-
-
-
-
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp
deleted file mode 100644
index 8fe95664a..000000000
--- a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTaskScheduler.cpp
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//#define __CELLOS_LV2__ 1
-
-#define USE_SAMPLE_PROCESS 1
-#ifdef USE_SAMPLE_PROCESS
-
-
-#include "MiniCLTaskScheduler.h"
-#include <stdio.h>
-
-#ifdef __SPU__
-
-
-
-void	SampleThreadFunc(void* userPtr,void* lsMemory)
-{
-	//do nothing
-	printf("hello world\n");
-}
-
-
-void*	SamplelsMemoryFunc()
-{
-	//don't create local store memory, just return 0
-	return 0;
-}
-
-
-#else
-
-
-#include "btThreadSupportInterface.h"
-
-//#	include "SPUAssert.h"
-#include <string.h>
-
-
-
-extern "C" {
-	extern char SPU_SAMPLE_ELF_SYMBOL[];
-}
-
-
-
-
-
-MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface*	threadInterface,  int maxNumOutstandingTasks)
-:m_threadInterface(threadInterface),
-m_maxNumOutstandingTasks(maxNumOutstandingTasks)
-{
-
-	m_taskBusy.resize(m_maxNumOutstandingTasks);
-	m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
-
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		m_taskBusy[i] = false;
-	}
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-
-	m_initialized = false;
-
-	m_threadInterface->startSPU();
-
-
-}
-
-MiniCLTaskScheduler::~MiniCLTaskScheduler()
-{
-	m_threadInterface->stopSPU();
-	
-}
-
-
-
-void	MiniCLTaskScheduler::initialize()
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("MiniCLTaskScheduler::initialize()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		m_taskBusy[i] = false;
-	}
-	m_numBusyTasks = 0;
-	m_currentTask = 0;
-	m_initialized = true;
-
-}
-
-
-void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes)
-{
-
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask);
-#endif //DEBUG_SPU_TASK_SCHEDULING
-
-	m_taskBusy[m_currentTask] = true;
-	m_numBusyTasks++;
-
-	MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
-	{
-		// send task description in event message
-		taskDesc.m_firstWorkUnit = firstWorkUnit;
-		taskDesc.m_lastWorkUnit = lastWorkUnit;
-		taskDesc.m_kernelProgramId = kernelProgramId;
-		//some bookkeeping to recognize finished tasks
-		taskDesc.m_taskId = m_currentTask;
-		
-		for (int i=0;i<MINI_CL_MAX_ARG;i++)
-		{
-			taskDesc.m_argSizes[i] = argSizes[i];
-			if (taskDesc.m_argSizes[i])
-			{
-				memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
-			}
-		}
-	}
-
-
-	m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
-
-	// if all tasks busy, wait for spu event to clear the task.
-	
-	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
-	{
-		unsigned int taskId;
-		unsigned int outputSize;
-
-		for (int i=0;i<m_maxNumOutstandingTasks;i++)
-	  {
-		  if (m_taskBusy[i])
-		  {
-			  taskId = i;
-			  break;
-		  }
-	  }
-		m_threadInterface->waitForResponse(&taskId, &outputSize);
-
-		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
-
-		postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-
-	// find new task buffer
-	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
-	{
-		if (!m_taskBusy[i])
-		{
-			m_currentTask = i;
-			break;
-		}
-	}
-}
-
-
-///Optional PPU-size post processing for each task
-void MiniCLTaskScheduler::postProcess(int taskId, int outputSize)
-{
-
-}
-
-
-void MiniCLTaskScheduler::flush()
-{
-#ifdef DEBUG_SPU_TASK_SCHEDULING
-	printf("\nSpuCollisionTaskProcess::flush()\n");
-#endif //DEBUG_SPU_TASK_SCHEDULING
-	
-
-	// all tasks are issued, wait for all tasks to be complete
-	while(m_numBusyTasks > 0)
-	{
-// Consolidating SPU code
-	  unsigned int taskId;
-	  unsigned int outputSize;
-	  
-	  for (int i=0;i<m_maxNumOutstandingTasks;i++)
-	  {
-		  if (m_taskBusy[i])
-		  {
-			  taskId = i;
-			  break;
-		  }
-	  }
-	  {
-			
-		  m_threadInterface->waitForResponse(&taskId, &outputSize);
-	  }
-
-		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
-
-		postProcess(taskId, outputSize);
-
-		m_taskBusy[taskId] = false;
-
-		m_numBusyTasks--;
-	}
-
-
-}
-
-#endif
-
-
-#endif //USE_SAMPLE_PROCESS
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/PlatformDefinitions.h b/Engine/lib/bullet/src/BulletMultiThreaded/PlatformDefinitions.h
index bda2d3b06..142103a09 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/PlatformDefinitions.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/PlatformDefinitions.h
@@ -1,9 +1,26 @@
-#ifndef TYPE_DEFINITIONS_H
-#define TYPE_DEFINITIONS_H
+#ifndef BT_TYPE_DEFINITIONS_H
+#define BT_TYPE_DEFINITIONS_H
 
 ///This file provides some platform/compiler checks for common definitions
+#include "LinearMath/btScalar.h"
+#include "LinearMath/btMinMax.h"
 
-#ifdef WIN32
+#ifdef PFX_USE_FREE_VECTORMATH
+#include "physics_effects/base_level/base/pfx_vectormath_include.win32.h"
+typedef Vectormath::Aos::Vector3    vmVector3;
+typedef Vectormath::Aos::Quat       vmQuat;
+typedef Vectormath::Aos::Matrix3    vmMatrix3;
+typedef Vectormath::Aos::Transform3 vmTransform3;
+typedef Vectormath::Aos::Point3     vmPoint3;
+#else
+#include "vectormath/vmInclude.h"
+#endif//PFX_USE_FREE_VECTORMATH
+
+
+
+
+
+#ifdef _WIN32
 
 typedef union
 {
@@ -19,7 +36,11 @@ typedef union
 
 		typedef unsigned char     uint8_t;
 #ifndef __PHYSICS_COMMON_H__
+#ifndef PFX_USE_FREE_VECTORMATH
+#ifndef __BT_SKIP_UINT64_H
 		typedef unsigned long int uint64_t;
+#endif //__BT_SKIP_UINT64_H
+#endif //PFX_USE_FREE_VECTORMATH
 		typedef unsigned int      uint32_t;
 #endif //__PHYSICS_COMMON_H__
 		typedef unsigned short    uint16_t;
@@ -52,31 +73,27 @@ typedef union
 #include <stdio.h>		
 #define spu_printf printf	
 #define DWORD unsigned int
-		
 			typedef union
 			{
 			  unsigned long long ull;
 			  unsigned int ui[2];
 			  void *p;
 			} addr64;
-		
-		
-#else
-
-#include <stdio.h>		
-#define spu_printf printf	
-
 #endif // USE_LIBSPE2
-	
+
 #endif	//__CELLOS_LV2__
 	
 #endif
 
+#ifdef __SPU__
+#include <stdio.h>		
+#define printf spu_printf
+#endif
 
 /* Included here because we need uint*_t typedefs */
 #include "PpuAddressSpace.h"
 
-#endif //TYPE_DEFINITIONS_H
+#endif //BT_TYPE_DEFINITIONS_H
 
 
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.cpp
index 241780940..81c0cf86d 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.cpp
@@ -48,7 +48,7 @@ PosixThreadSupport::~PosixThreadSupport()
 #endif
 
 // this semaphore will signal, if and how many threads are finished with their work
-static sem_t* mainSemaphore;
+static sem_t* mainSemaphore=0;
 
 static sem_t* createSem(const char* baseName)
 {
@@ -58,9 +58,10 @@ static sem_t* createSem(const char* baseName)
         char name[32];
         snprintf(name, 32, "/%s-%d-%4.4d", baseName, getpid(), semCount++); 
         sem_t* tempSem = sem_open(name, O_CREAT, 0600, 0);
+
         if (tempSem != reinterpret_cast<sem_t *>(SEM_FAILED))
         {
-        	//printf("Created \"%s\" Semaphore %x\n", name, tempSem);
+//        printf("Created \"%s\" Semaphore %p\n", name, tempSem);
         }
         else
 	{
@@ -172,7 +173,7 @@ void PosixThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned in
 	// get at least one thread which has finished
         size_t last = -1;
         
-        for(size_t t=0; t < m_activeSpuStatus.size(); ++t) {
+        for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) {
             if(2 == m_activeSpuStatus[t].m_status) {
                 last = t;
                 break;
@@ -199,7 +200,8 @@ void PosixThreadSupport::startThreads(ThreadConstructionInfo& threadConstruction
 	m_activeSpuStatus.resize(threadConstructionInfo.m_numThreads);
         
 	mainSemaphore = createSem("main");                
-        
+	//checkPThreadFunction(sem_wait(mainSemaphore));
+   
 	for (int i=0;i < threadConstructionInfo.m_numThreads;i++)
 	{
 		printf("starting thread %d\n",i);
@@ -233,17 +235,175 @@ void PosixThreadSupport::startSPU()
 ///tell the task scheduler we are done with the SPU tasks
 void PosixThreadSupport::stopSPU()
 {
-	for(size_t t=0; t < m_activeSpuStatus.size(); ++t) {
+	for(size_t t=0; t < size_t(m_activeSpuStatus.size()); ++t) 
+	{
             btSpuStatus&	spuStatus = m_activeSpuStatus[t];
-            printf("%s: Thread %i used: %ld\n", __FUNCTION__, t, spuStatus.threadUsed);
-        
-            destroySem(spuStatus.startSemaphore);
-            checkPThreadFunction(pthread_cancel(spuStatus.thread));
-        }
-        destroySem(mainSemaphore);
+            printf("%s: Thread %i used: %ld\n", __FUNCTION__, int(t), spuStatus.threadUsed);
 
+	spuStatus.m_userPtr = 0;       
+ 	checkPThreadFunction(sem_post(spuStatus.startSemaphore));
+	checkPThreadFunction(sem_wait(mainSemaphore));
+
+	printf("destroy semaphore\n"); 
+            destroySem(spuStatus.startSemaphore);
+            printf("semaphore destroyed\n");
+		checkPThreadFunction(pthread_join(spuStatus.thread,0));
+
+        }
+	printf("destroy main semaphore\n");
+        destroySem(mainSemaphore);
+	printf("main semaphore destroyed\n");
 	m_activeSpuStatus.clear();
 }
 
+class PosixCriticalSection : public btCriticalSection 
+{
+	pthread_mutex_t m_mutex;
+	
+public:
+	PosixCriticalSection() 
+	{
+		pthread_mutex_init(&m_mutex, NULL);
+	}
+	virtual ~PosixCriticalSection() 
+	{
+		pthread_mutex_destroy(&m_mutex);
+	}
+	
+	ATTRIBUTE_ALIGNED16(unsigned int mCommonBuff[32]);
+	
+	virtual unsigned int getSharedParam(int i)
+	{
+		return mCommonBuff[i];
+	}
+	virtual void setSharedParam(int i,unsigned int p)
+	{
+		mCommonBuff[i] = p;
+	}
+	
+	virtual void lock()
+	{
+		pthread_mutex_lock(&m_mutex);
+	}
+	virtual void unlock()
+	{
+		pthread_mutex_unlock(&m_mutex);
+	}
+};
+
+
+#if defined(_POSIX_BARRIERS) && (_POSIX_BARRIERS - 20012L) >= 0
+/* OK to use barriers on this platform */
+class PosixBarrier : public btBarrier 
+{
+	pthread_barrier_t m_barr;
+	int m_numThreads;
+public:
+	PosixBarrier()
+	:m_numThreads(0)	{	}
+	virtual ~PosixBarrier()	{
+		pthread_barrier_destroy(&m_barr);
+	}
+	
+	virtual void sync()
+	{
+		int rc = pthread_barrier_wait(&m_barr);
+		if(rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD)
+		{
+			printf("Could not wait on barrier\n");
+			exit(-1);
+		}
+	}
+	virtual void setMaxCount(int numThreads)
+	{
+		int result = pthread_barrier_init(&m_barr, NULL, numThreads);
+		m_numThreads = numThreads;
+		btAssert(result==0);
+	}
+	virtual int  getMaxCount()
+	{
+		return m_numThreads;
+	}
+};
+#else
+/* Not OK to use barriers on this platform - insert alternate code here */
+class PosixBarrier : public btBarrier 
+{
+	pthread_mutex_t m_mutex;
+	pthread_cond_t m_cond;
+	
+	int m_numThreads;
+	int	m_called;
+	
+public:
+	PosixBarrier()
+	:m_numThreads(0)
+	{
+	}
+	virtual ~PosixBarrier() 
+	{
+		if (m_numThreads>0)
+		{
+			pthread_mutex_destroy(&m_mutex);
+			pthread_cond_destroy(&m_cond);
+		}
+	}
+	
+	virtual void sync()
+	{		
+		pthread_mutex_lock(&m_mutex);
+		m_called++;
+		if (m_called == m_numThreads) {
+			m_called = 0;
+			pthread_cond_broadcast(&m_cond);
+		} else {
+			pthread_cond_wait(&m_cond,&m_mutex);
+		}
+		pthread_mutex_unlock(&m_mutex);
+		
+	}
+	virtual void setMaxCount(int numThreads)
+	{
+		if (m_numThreads>0)
+		{
+			pthread_mutex_destroy(&m_mutex);
+			pthread_cond_destroy(&m_cond);
+		}
+		m_called = 0;
+		pthread_mutex_init(&m_mutex,NULL);
+		pthread_cond_init(&m_cond,NULL);
+		m_numThreads = numThreads;
+	}
+	virtual int  getMaxCount()
+	{
+		return m_numThreads;
+	}
+};
+
+#endif//_POSIX_BARRIERS
+
+
+
+btBarrier* PosixThreadSupport::createBarrier()
+{
+	PosixBarrier* barrier = new PosixBarrier();
+	barrier->setMaxCount(getNumTasks());
+	return barrier;
+}
+
+btCriticalSection* PosixThreadSupport::createCriticalSection()
+{
+	return new PosixCriticalSection();
+}
+
+void	PosixThreadSupport::deleteBarrier(btBarrier* barrier)
+{
+	delete barrier;
+}
+
+void PosixThreadSupport::deleteCriticalSection(btCriticalSection* cs)
+{
+	delete cs;
+}
 #endif // USE_PTHREADS
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.h b/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.h
index 7cc49115b..bf7578f51 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/PosixThreadSupport.h
@@ -13,16 +13,22 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
+#ifndef BT_POSIX_THREAD_SUPPORT_H
+#define BT_POSIX_THREAD_SUPPORT_H
+
 
 #include "LinearMath/btScalar.h"
 #include "PlatformDefinitions.h"
 
-#ifdef USE_PTHREADS  //platform specific defines are defined in PlatformDefinitions.h
+#ifdef USE_PTHREADS //platform specifc defines are defined in PlatformDefinitions.h
+
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE 600 //for definition of pthread_barrier_t, see http://pages.cs.wisc.edu/~travitch/pthreads_primer.html
+#endif //_XOPEN_SOURCE
 #include <pthread.h>
 #include <semaphore.h>
 
-#ifndef POSIX_THREAD_SUPPORT_H
-#define POSIX_THREAD_SUPPORT_H
+
 
 #include "LinearMath/btAlignedObjectArray.h"
 
@@ -68,7 +74,7 @@ public:
 
 	struct	ThreadConstructionInfo
 	{
-		ThreadConstructionInfo(char* uniqueName,
+		ThreadConstructionInfo(const char* uniqueName,
 									PosixThreadFunc userThreadFunc,
 									PosixlsMemorySetupFunc	lsMemoryFunc,
 									int numThreads=1,
@@ -83,7 +89,7 @@ public:
 
 		}
 
-		char*					m_uniqueName;
+		const char*					m_uniqueName;
 		PosixThreadFunc			m_userThreadFunc;
 		PosixlsMemorySetupFunc	m_lsMemoryFunc;
 		int						m_numThreads;
@@ -117,8 +123,25 @@ public:
 	{
 		return m_activeSpuStatus.size();
 	}
+
+	virtual btBarrier* createBarrier();
+
+	virtual btCriticalSection* createCriticalSection();
+
+	virtual void deleteBarrier(btBarrier* barrier);
+
+	virtual void deleteCriticalSection(btCriticalSection* criticalSection);
+	
+	
+	virtual void*	getThreadLocalMemory(int taskId)
+	{
+		return m_activeSpuStatus[taskId].m_lsMemory;
+	}
+
 };
 
-#endif // POSIX_THREAD_SUPPORT_H
-
 #endif // USE_PTHREADS
+
+#endif // BT_POSIX_THREAD_SUPPORT_H
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/PpuAddressSpace.h b/Engine/lib/bullet/src/BulletMultiThreaded/PpuAddressSpace.h
index 93c83648c..6f2282745 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/PpuAddressSpace.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/PpuAddressSpace.h
@@ -1,18 +1,37 @@
-#ifndef __PPU_ADDRESS_SPACE_H
-#define __PPU_ADDRESS_SPACE_H
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2010 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
 
 
-#ifdef WIN32
+#ifndef BT_PPU_ADDRESS_SPACE_H
+#define BT_PPU_ADDRESS_SPACE_H
+
+
+#ifdef _WIN32
 //stop those casting warnings until we have a better solution for ppu_address_t / void* / uint64 conversions
 #pragma warning (disable: 4311)
 #pragma warning (disable: 4312)
-#endif //WIN32
+#endif //_WIN32
 
-#ifdef USE_ADDR64
-typedef uint64_t ppu_address_t;
+
+#if defined(_WIN64)
+	typedef unsigned __int64 ppu_address_t;
+#elif defined(__LP64__) || defined(__x86_64__)
+	typedef uint64_t ppu_address_t;
 #else
-typedef uint32_t ppu_address_t;
-#endif
+	typedef uint32_t ppu_address_t;
+#endif //defined(_WIN64)
 
-#endif
+#endif //BT_PPU_ADDRESS_SPACE_H
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.cpp
index 4e9c822bb..199927721 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.cpp
@@ -91,3 +91,91 @@ void SequentialThreadSupport::setNumTasks(int numTasks)
 {
 	printf("SequentialThreadSupport::setNumTasks(%d) is not implemented and has no effect\n",numTasks);
 }
+
+
+
+
+class btDummyBarrier : public btBarrier
+{
+private:
+		
+public:
+	btDummyBarrier()
+	{
+	}
+	
+	virtual ~btDummyBarrier()
+	{
+	}
+	
+	void sync()
+	{
+	}
+	
+	virtual void setMaxCount(int n) {}
+	virtual int  getMaxCount() {return 1;}
+};
+
+class btDummyCriticalSection : public btCriticalSection
+{
+	
+public:
+	btDummyCriticalSection()
+	{
+	}
+	
+	virtual ~btDummyCriticalSection()
+	{
+	}
+	
+	unsigned int getSharedParam(int i)
+	{
+		btAssert(i>=0&&i<31);
+		return mCommonBuff[i+1];
+	}
+	
+	void setSharedParam(int i,unsigned int p)
+	{
+		btAssert(i>=0&&i<31);
+		mCommonBuff[i+1] = p;
+	}
+	
+	void lock()
+	{
+		mCommonBuff[0] = 1;
+	}
+	
+	void unlock()
+	{
+		mCommonBuff[0] = 0;
+	}
+};
+
+
+
+
+btBarrier*	SequentialThreadSupport::createBarrier()
+{
+	return new btDummyBarrier();
+}
+
+btCriticalSection* SequentialThreadSupport::createCriticalSection()
+{
+	return new btDummyCriticalSection();
+	
+}
+
+void SequentialThreadSupport::deleteBarrier(btBarrier* barrier)
+{
+    delete barrier;
+}
+
+void SequentialThreadSupport::deleteCriticalSection(btCriticalSection* criticalSection)
+{
+    delete criticalSection;
+}
+
+
+
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.h b/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.h
index 4256ebd2a..a188ef219 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SequentialThreadSupport.h
@@ -17,8 +17,8 @@ subject to the following restrictions:
 #include "PlatformDefinitions.h"
 
 
-#ifndef SEQUENTIAL_THREAD_SUPPORT_H
-#define SEQUENTIAL_THREAD_SUPPORT_H
+#ifndef BT_SEQUENTIAL_THREAD_SUPPORT_H
+#define BT_SEQUENTIAL_THREAD_SUPPORT_H
 
 #include "LinearMath/btAlignedObjectArray.h"
 
@@ -51,7 +51,7 @@ private:
 public:
 	struct	SequentialThreadConstructionInfo
 	{
-		SequentialThreadConstructionInfo (char* uniqueName,
+		SequentialThreadConstructionInfo (const char* uniqueName,
 									SequentialThreadFunc userThreadFunc,
 									SequentiallsMemorySetupFunc	lsMemoryFunc
 									)
@@ -62,7 +62,7 @@ public:
 
 		}
 
-		char*						m_uniqueName;
+		const char*						m_uniqueName;
 		SequentialThreadFunc		m_userThreadFunc;
 		SequentiallsMemorySetupFunc	m_lsMemoryFunc;
 	};
@@ -85,8 +85,16 @@ public:
 	{
 		return 1;
 	}
+	virtual btBarrier*	createBarrier();
+
+	virtual btCriticalSection* createCriticalSection();
+	
+    virtual void deleteBarrier(btBarrier* barrier);
+    
+    virtual void deleteCriticalSection(btCriticalSection* criticalSection);
+
 
 };
 
-#endif //SEQUENTIAL_THREAD_SUPPORT_H
+#endif //BT_SEQUENTIAL_THREAD_SUPPORT_H
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionObjectWrapper.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionObjectWrapper.h
index 36ea49209..f90da2775 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionObjectWrapper.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionObjectWrapper.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPU_COLLISION_OBJECT_WRAPPER_H
-#define SPU_COLLISION_OBJECT_WRAPPER_H
+#ifndef BT_SPU_COLLISION_OBJECT_WRAPPER_H
+#define BT_SPU_COLLISION_OBJECT_WRAPPER_H
 
 #include "PlatformDefinitions.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
@@ -37,4 +37,4 @@ public:
 };
 
 
-#endif //SPU_COLLISION_OBJECT_WRAPPER_H
+#endif //BT_SPU_COLLISION_OBJECT_WRAPPER_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
index dab42cc71..f606d1363 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.cpp
@@ -26,7 +26,7 @@ subject to the following restrictions:
 
 void	SpuCollisionTaskProcess::setNumTasks(int maxNumTasks)
 {
-	if (m_maxNumOutstandingTasks != maxNumTasks)
+	if (int(m_maxNumOutstandingTasks) != maxNumTasks)
 	{
 		m_maxNumOutstandingTasks = maxNumTasks;
 		m_taskBusy.resize(m_maxNumOutstandingTasks);
@@ -44,7 +44,6 @@ void	SpuCollisionTaskProcess::setNumTasks(int maxNumTasks)
 		}
 		
 		m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*m_maxNumOutstandingTasks, 128);
-					m_workUnitTaskBuffers = (unsigned char *)btAlignedAlloc(MIDPHASE_WORKUNIT_TASK_SIZE*6, 128);
 	}
 	
 }
@@ -69,7 +68,7 @@ m_maxNumOutstandingTasks(0)
 	m_threadInterface->startSPU();
 
 	//printf("sizeof vec_float4: %d\n", sizeof(vec_float4));
-	printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", sizeof(SpuGatherAndProcessWorkUnitInput));
+	printf("sizeof SpuGatherAndProcessWorkUnitInput: %d\n", int(sizeof(SpuGatherAndProcessWorkUnitInput)));
 
 }
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.h
index a9ffde1a5..23b5b05a1 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuCollisionTaskProcess.h
@@ -13,12 +13,12 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPU_COLLISION_TASK_PROCESS_H
-#define SPU_COLLISION_TASK_PROCESS_H
+#ifndef BT_SPU_COLLISION_TASK_PROCESS_H
+#define BT_SPU_COLLISION_TASK_PROCESS_H
 
 #include <assert.h>
 
-#include <LinearMath/btScalar.h>
+#include "LinearMath/btScalar.h"
 
 #include "PlatformDefinitions.h"
 #include "LinearMath/btAlignedObjectArray.h"
@@ -35,7 +35,7 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btCollisionShape.h"
 #include "BulletCollision/CollisionShapes/btConvexShape.h"
 
-#include <LinearMath/btAlignedAllocator.h>
+#include "LinearMath/btAlignedAllocator.h"
 
 #include <stdio.h>
 
@@ -159,5 +159,5 @@ public:
 #define MIDPHASE_NUM_WORKUNITS_PER_TASK (MIDPHASE_NUM_WORKUNITS_PER_PAGE*MIDPHASE_NUM_WORKUNIT_PAGES)
 
 
-#endif // SPU_COLLISION_TASK_PROCESS_H
+#endif // BT_SPU_COLLISION_TASK_PROCESS_H
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
index 286b63191..62cf4f0f5 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.cpp
@@ -22,7 +22,7 @@ subject to the following restrictions:
 
 
 
-void SpuContactManifoldCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void SpuContactManifoldCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	btAssert(0);
 }
@@ -34,7 +34,7 @@ btScalar SpuContactManifoldCollisionAlgorithm::calculateTimeOfImpact(btCollision
 }
 
 #ifndef __SPU__
-SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1)
+SpuContactManifoldCollisionAlgorithm::SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObject* body0,const btCollisionObject* body1)
 :btCollisionAlgorithm(ci)
 #ifdef USE_SEPDISTANCE_UTIL
 ,m_sepDistance(body0->getCollisionShape()->getAngularMotionDisc(),body1->getCollisionShape()->getAngularMotionDisc())
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
index 151cb2c79..14b0a9454 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuContactManifoldCollisionAlgorithm.h
@@ -13,14 +13,15 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
-#define SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
+#ifndef BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
+#define BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
 
 #include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
 #include "BulletCollision/CollisionDispatch/btCollisionCreateFunc.h"
 #include "BulletCollision/BroadphaseCollision/btDispatcher.h"
 #include "LinearMath/btTransformUtil.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 class btPersistentManifold;
 
@@ -37,20 +38,20 @@ ATTRIBUTE_ALIGNED16(class) SpuContactManifoldCollisionAlgorithm : public btColli
 	float	m_collisionMargin0;
 	float	m_collisionMargin1;
 
-	btCollisionObject*	m_collisionObject0;
-	btCollisionObject*	m_collisionObject1;
+	const btCollisionObject*	m_collisionObject0;
+	const btCollisionObject*	m_collisionObject1;
 	
 	
 
 	
 public:
 	
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	
-	SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+	SpuContactManifoldCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObject* body0,const btCollisionObject* body1);
 #ifdef USE_SEPDISTANCE_UTIL
 	btConvexSeparatingDistanceUtil	m_sepDistance;
 #endif //USE_SEPDISTANCE_UTIL
@@ -68,12 +69,12 @@ public:
 		return m_manifoldPtr;
 	}
 
-	btCollisionObject*	getCollisionObject0()
+	const btCollisionObject*	getCollisionObject0()
 	{
 		return m_collisionObject0;
 	}
 	
-	btCollisionObject*	getCollisionObject1()
+	const btCollisionObject*	getCollisionObject1()
 	{
 		return m_collisionObject1;
 	}
@@ -108,13 +109,13 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
-		{
+        virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
+ 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(SpuContactManifoldCollisionAlgorithm));
-			return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0,body1);
+			return new(mem) SpuContactManifoldCollisionAlgorithm(ci,body0Wrap->getCollisionObject(),body1Wrap->getCollisionObject());
 		}
 	};
 
 };
 
-#endif //SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
+#endif //BT_SPU_CONTACTMANIFOLD_COLLISION_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuDoubleBuffer.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuDoubleBuffer.h
index 801c86080..558d61526 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuDoubleBuffer.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuDoubleBuffer.h
@@ -1,8 +1,24 @@
-#ifndef DOUBLE_BUFFER_H
-#define DOUBLE_BUFFER_H
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef BT_DOUBLE_BUFFER_H
+#define BT_DOUBLE_BUFFER_H
 
 #include "SpuFakeDma.h"
-#include <LinearMath/btScalar.h>
+#include "LinearMath/btScalar.h"
 
 
 ///DoubleBuffer
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.cpp
index 62cef3961..b776a120a 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.cpp
@@ -30,7 +30,7 @@ void*	cellDmaLargeGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag
 	cellDmaLargeGet(ls,ea,size,tag,tid,rid);
 	return ls;
 #else
-	return (void*)(uint32_t)ea;
+	return (void*)(ppu_address_t)ea;
 #endif
 }
 
@@ -40,7 +40,7 @@ void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag
 	mfc_get(ls,ea,size,tag,0,0);
 	return ls;
 #else
-	return (void*)(uint32_t)ea;
+	return (void*)(ppu_address_t)ea;
 #endif
 }
 
@@ -53,7 +53,7 @@ void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uin
 	cellDmaGet(ls,ea,size,tag,tid,rid);
 	return ls;
 #else
-	return (void*)(uint32_t)ea;
+	return (void*)(ppu_address_t)ea;
 #endif
 }
 
@@ -174,6 +174,9 @@ int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid,
 {
 	char* mainMem = (char*)ea;
 	char* localStore = (char*)ls;
+
+//	printf("mainMem=%x, localStore=%x",mainMem,localStore);
+
 #ifdef USE_MEMCPY
 	memcpy(localStore,mainMem,size);
 #else
@@ -182,6 +185,7 @@ int	cellDmaGet(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid,
 		localStore[i] = mainMem[i];
 	}	
 #endif //#ifdef USE_MEMCPY
+//	printf(" finished\n");
 	return 0;
 }
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.h
index f5e49b7be..40e203936 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuFakeDma.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef FAKE_DMA_H
-#define FAKE_DMA_H
+#ifndef BT_FAKE_DMA_H
+#define BT_FAKE_DMA_H
 
 
 #include "PlatformDefinitions.h"
@@ -132,4 +132,4 @@ void*	cellDmaGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uin
 void*	cellDmaSmallGetReadOnly(void *ls, uint64_t ea, uint32_t size, uint32_t tag, uint32_t tid, uint32_t rid);
 
 
-#endif //FAKE_DMA_H
+#endif //BT_FAKE_DMA_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
index f81f75190..b9e88a07f 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.cpp
@@ -23,6 +23,8 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "BulletCollision/CollisionShapes/btCollisionShape.h"
 #include "LinearMath/btQuickprof.h"
+#include "BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 
 
@@ -48,6 +50,7 @@ bool	SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,i
 //		(proxyType0 == CONE_SHAPE_PROXYTYPE) ||
 		(proxyType0 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
 		(proxyType0 == CONVEX_HULL_SHAPE_PROXYTYPE)||
+		(proxyType0 == STATIC_PLANE_PROXYTYPE)||
 		(proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
 		);
 
@@ -60,9 +63,11 @@ bool	SpuGatheringCollisionDispatcher::supportsDispatchPairOnSpu(int proxyType0,i
 //		(proxyType1 == CONE_SHAPE_PROXYTYPE) ||
 		(proxyType1 == TRIANGLE_MESH_SHAPE_PROXYTYPE) ||
 		(proxyType1 == CONVEX_HULL_SHAPE_PROXYTYPE) ||
+		(proxyType1 == STATIC_PLANE_PROXYTYPE) ||
 		(proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
 		);
 
+	
 	return supported0 && supported1;
 }
 
@@ -124,8 +129,33 @@ public:
 				{
 					int	proxyType0 = colObj0->getCollisionShape()->getShapeType();
 					int	proxyType1 = colObj1->getCollisionShape()->getShapeType();
-					if (m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1))
+					bool supportsSpuDispatch = m_dispatcher->supportsDispatchPairOnSpu(proxyType0,proxyType1) 
+						&& ((colObj0->getCollisionFlags() & btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) == 0)
+						&& ((colObj1->getCollisionFlags() & btCollisionObject::CF_DISABLE_SPU_COLLISION_PROCESSING) == 0);
+
+					if (proxyType0 == COMPOUND_SHAPE_PROXYTYPE)
 					{
+						btCompoundShape* compound = (btCompoundShape*)colObj0->getCollisionShape();
+						if (compound->getNumChildShapes()>MAX_SPU_COMPOUND_SUBSHAPES)
+						{
+							//printf("PPU fallback, compound->getNumChildShapes(%d)>%d\n",compound->getNumChildShapes(),MAX_SPU_COMPOUND_SUBSHAPES);
+							supportsSpuDispatch = false;
+						}
+					}
+
+					if (proxyType1 == COMPOUND_SHAPE_PROXYTYPE)
+					{
+						btCompoundShape* compound = (btCompoundShape*)colObj1->getCollisionShape();
+						if (compound->getNumChildShapes()>MAX_SPU_COMPOUND_SUBSHAPES)
+						{
+							//printf("PPU fallback, compound->getNumChildShapes(%d)>%d\n",compound->getNumChildShapes(),MAX_SPU_COMPOUND_SUBSHAPES);
+							supportsSpuDispatch = false;
+						}
+					}
+
+					if (supportsSpuDispatch)
+					{
+
 						int so = sizeof(SpuContactManifoldCollisionAlgorithm);
 #ifdef ALLOCATE_SEPARATELY
 						void* mem = btAlignedAlloc(so,16);//m_dispatcher->allocateCollisionAlgorithm(so);
@@ -136,7 +166,10 @@ public:
 						collisionPair.m_internalTmpValue =  2;
 					} else
 					{
-						collisionPair.m_algorithm = m_dispatcher->findAlgorithm(colObj0,colObj1);
+						btCollisionObjectWrapper ob0(0,colObj0->getCollisionShape(),colObj0,colObj0->getWorldTransform());
+						btCollisionObjectWrapper ob1(0,colObj1->getCollisionShape(),colObj1,colObj1->getWorldTransform());
+
+						collisionPair.m_algorithm = m_dispatcher->findAlgorithm(&ob0,&ob1);
 						collisionPair.m_internalTmpValue = 3;
 					}
 				} 
@@ -175,48 +208,60 @@ void	SpuGatheringCollisionDispatcher::dispatchAllCollisionPairs(btOverlappingPai
 
 		//send one big batch
 		int numTotalPairs = pairCache->getNumOverlappingPairs();
-		btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
-		int i;
+		if (numTotalPairs)
 		{
-			BT_PROFILE("addWorkToTask");
-			for (i=0;i<numTotalPairs;)
+			btBroadphasePair* pairPtr = pairCache->getOverlappingPairArrayPtr();
+			int i;
 			{
-				//Performance Hint: tweak this number during benchmarking
-				static const int pairRange = SPU_BATCHSIZE_BROADPHASE_PAIRS;
-				int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
-				m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
-				i = endIndex;
-			}
-		}
-
-		{
-			BT_PROFILE("PPU fallback");
-			//handle PPU fallback pairs
-			for (i=0;i<numTotalPairs;i++)
-			{
-				btBroadphasePair& collisionPair = pairPtr[i];
-				if (collisionPair.m_internalTmpValue == 3)
+				int pairRange =	SPU_BATCHSIZE_BROADPHASE_PAIRS;
+				if (numTotalPairs < (m_spuCollisionTaskProcess->getNumTasks()*SPU_BATCHSIZE_BROADPHASE_PAIRS))
 				{
-					if (collisionPair.m_algorithm)
+					pairRange = (numTotalPairs/m_spuCollisionTaskProcess->getNumTasks())+1;
+				}
+	
+				BT_PROFILE("addWorkToTask");
+				for (i=0;i<numTotalPairs;)
+				{
+					//Performance Hint: tweak this number during benchmarking
+					
+					int endIndex = (i+pairRange) < numTotalPairs ? i+pairRange : numTotalPairs;
+					m_spuCollisionTaskProcess->addWorkToTask(pairPtr,i,endIndex);
+					i = endIndex;
+				}
+			}
+			{
+				BT_PROFILE("PPU fallback");
+				//handle PPU fallback pairs
+				for (i=0;i<numTotalPairs;i++)
+				{
+					btBroadphasePair& collisionPair = pairPtr[i];
+					if (collisionPair.m_internalTmpValue == 3)
 					{
-						btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
-						btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
-
-						if (dispatcher->needsCollision(colObj0,colObj1))
+						if (collisionPair.m_algorithm)
 						{
-							btManifoldResult contactPointResult(colObj0,colObj1);
-							
-							if (dispatchInfo.m_dispatchFunc == 		btDispatcherInfo::DISPATCH_DISCRETE)
+							btCollisionObject* colObj0 = (btCollisionObject*)collisionPair.m_pProxy0->m_clientObject;
+							btCollisionObject* colObj1 = (btCollisionObject*)collisionPair.m_pProxy1->m_clientObject;
+	
+							if (dispatcher->needsCollision(colObj0,colObj1))
 							{
-								//discrete collision detection query
-								collisionPair.m_algorithm->processCollision(colObj0,colObj1,dispatchInfo,&contactPointResult);
-							} else
-							{
-								//continuous collision detection query, time of impact (toi)
-								btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
-								if (dispatchInfo.m_timeOfImpact > toi)
-									dispatchInfo.m_timeOfImpact = toi;
+							//discrete collision detection query
+								btCollisionObjectWrapper ob0(0,colObj0->getCollisionShape(),colObj0,colObj0->getWorldTransform());
+								btCollisionObjectWrapper ob1(0,colObj1->getCollisionShape(),colObj1,colObj1->getWorldTransform());
 
+								btManifoldResult contactPointResult(&ob0,&ob1);
+								
+								if (dispatchInfo.m_dispatchFunc == 		btDispatcherInfo::DISPATCH_DISCRETE)
+								{
+									
+									collisionPair.m_algorithm->processCollision(&ob0,&ob1,dispatchInfo,&contactPointResult);
+								} else
+								{
+									//continuous collision detection query, time of impact (toi)
+									btScalar toi = collisionPair.m_algorithm->calculateTimeOfImpact(colObj0,colObj1,dispatchInfo,&contactPointResult);
+									if (dispatchInfo.m_timeOfImpact > toi)
+										dispatchInfo.m_timeOfImpact = toi;
+	
+								}
 							}
 						}
 					}
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
index 120751a1e..f8bc7da65 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuGatheringCollisionDispatcher.h
@@ -12,8 +12,8 @@ subject to the following restrictions:
 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
 3. This notice may not be removed or altered from any source distribution.
 */
-#ifndef SPU_GATHERING_COLLISION__DISPATCHER_H
-#define SPU_GATHERING_COLLISION__DISPATCHER_H
+#ifndef BT_SPU_GATHERING_COLLISION__DISPATCHER_H
+#define BT_SPU_GATHERING_COLLISION__DISPATCHER_H
 
 #include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
 
@@ -23,9 +23,10 @@ subject to the following restrictions:
 ///Too big value might render some SPUs are idle, while a few other SPUs are doing all work.
 //#define SPU_BATCHSIZE_BROADPHASE_PAIRS 8
 //#define SPU_BATCHSIZE_BROADPHASE_PAIRS 16
-#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
-//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 64
+#define SPU_BATCHSIZE_BROADPHASE_PAIRS 128
 //#define SPU_BATCHSIZE_BROADPHASE_PAIRS 256
+//#define SPU_BATCHSIZE_BROADPHASE_PAIRS 512
 //#define SPU_BATCHSIZE_BROADPHASE_PAIRS 1024
 
 
@@ -66,4 +67,6 @@ public:
 
 
 
-#endif //SPU_GATHERING_COLLISION__DISPATCHER_H
+#endif //BT_SPU_GATHERING_COLLISION__DISPATCHER_H
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuLibspe2Support.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuLibspe2Support.h
index a6d6baca4..37a5e79f0 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuLibspe2Support.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuLibspe2Support.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 */
 
 
-#ifndef SPU_LIBSPE2_SUPPORT_H
-#define SPU_LIBSPE2_SUPPORT_H
+#ifndef BT_SPU_LIBSPE2_SUPPORT_H
+#define BT_SPU_LIBSPE2_SUPPORT_H
 
 #include <LinearMath/btScalar.h> //for uint32_t etc.
 
@@ -173,7 +173,7 @@ private:
 
 #endif //USE_LIBSPE2
 
-#endif //SPU_LIBSPE2_SUPPORT_H
+#endif //BT_SPU_LIBSPE2_SUPPORT_H
 
 
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
index c5b68743c..e51796119 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
@@ -24,11 +24,11 @@ subject to the following restrictions:
 
 #include <math.h>
 
-//#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h"
-#include <vectormath_aos.h>
+
+#include "../PlatformDefinitions.h"
+
 
 
-using namespace Vectormath::Aos;
 
 enum FeatureType { F, E, V };
 
@@ -39,21 +39,21 @@ enum FeatureType { F, E, V };
 class Box
 {
 public:
-	Vector3 half;
+	vmVector3 mHalf;
 
 	inline Box()
 	{}
-	inline Box(PE_REF(Vector3) half_);
+	inline Box(PE_REF(vmVector3) half_);
 	inline Box(float hx, float hy, float hz);
 
-	inline void Set(PE_REF(Vector3) half_);
+	inline void Set(PE_REF(vmVector3) half_);
 	inline void Set(float hx, float hy, float hz);
 
-	inline Vector3 GetAABB(const Matrix3& rotation) const;
+	inline vmVector3 GetAABB(const vmMatrix3& rotation) const;
 };
 
 inline
-Box::Box(PE_REF(Vector3) half_)
+Box::Box(PE_REF(vmVector3) half_)
 {
 	Set(half_);
 }
@@ -66,23 +66,23 @@ Box::Box(float hx, float hy, float hz)
 
 inline
 void
-Box::Set(PE_REF(Vector3) half_)
+Box::Set(PE_REF(vmVector3) half_)
 {
-	half = half_;
+	mHalf = half_;
 }
 
 inline
 void
 Box::Set(float hx, float hy, float hz)
 {
-	half = Vector3(hx, hy, hz);
+	mHalf = vmVector3(hx, hy, hz);
 }
 
 inline
-Vector3
-Box::GetAABB(const Matrix3& rotation) const
+vmVector3
+Box::GetAABB(const vmMatrix3& rotation) const
 {
-	return absPerElem(rotation) * half;
+	return absPerElem(rotation) * mHalf;
 }
 
 //-------------------------------------------------------------------------------------------------
@@ -95,7 +95,7 @@ class BoxPoint
 public:
 	BoxPoint() : localPoint(0.0f) {}
 
-	Point3      localPoint;
+	vmPoint3      localPoint;
 	FeatureType featureType;
 	int         featureIdx;
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
index b3c8f3d2f..8d755b223 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.cpp
@@ -44,7 +44,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
 		const btTransform& t = xform;
 		btMatrix3x3 abs_b = t.getBasis().absolute();  
 		btVector3 center = t.getOrigin();
-		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
+        btVector3 extent = halfExtents.dot3( abs_b[0], abs_b[1], abs_b[2] );
 		
 		aabbMin = center - extent;
 		aabbMax = center + extent;
@@ -67,7 +67,7 @@ void computeAabb (btVector3& aabbMin, btVector3& aabbMax, btConvexInternalShape*
 		const btTransform& t = xform;
 		btMatrix3x3 abs_b = t.getBasis().absolute();  
 		btVector3 center = t.getOrigin();
-		btVector3 extent = btVector3(abs_b[0].dot(halfExtents),abs_b[1].dot(halfExtents),abs_b[2].dot(halfExtents));
+        btVector3 extent = halfExtents.dot3( abs_b[0], abs_b[1], abs_b[2] );
 		
 		aabbMin = center - extent;
 		aabbMax = center + extent;
@@ -198,6 +198,12 @@ int		getShapeTypeSize(int shapeType)
 			btAssert(shapeSize < MAX_SHAPE_SIZE);
 			return shapeSize;
 		}
+	case STATIC_PLANE_PROXYTYPE:
+		{
+			int shapeSize = sizeof(btStaticPlaneShape);
+			btAssert(shapeSize < MAX_SHAPE_SIZE);
+			return shapeSize;
+		}
 
 	default:
 		btAssert(0);
@@ -225,6 +231,7 @@ void dmaCollisionShape (void* collisionShapeLocation, ppu_address_t collisionSha
 {
 	register int dmaSize = getShapeTypeSize(shapeType);
 	cellDmaGet(collisionShapeLocation, collisionShapePtr  , dmaSize, DMA_TAG(dmaTag), 0, 0);
+	//cellDmaGetReadOnly(collisionShapeLocation, collisionShapePtr  , dmaSize, DMA_TAG(dmaTag), 0, 0);
 	//cellDmaWaitTagStatusAll(DMA_MASK(dmaTag));
 }
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
index bc2159260..aa8a29104 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuCollisionShapes.h
@@ -20,6 +20,7 @@ subject to the following restrictions:
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
 #include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
 #include "BulletCollision/CollisionShapes/btCylinderShape.h"
+#include "BulletCollision/CollisionShapes/btStaticPlaneShape.h"
 
 #include "BulletCollision/CollisionShapes/btOptimizedBvh.h"
 #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
@@ -32,7 +33,9 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btConvexHullShape.h"
 #include "BulletCollision/CollisionShapes/btCompoundShape.h"
 
-#define MAX_NUM_SPU_CONVEX_POINTS 128
+#define MAX_NUM_SPU_CONVEX_POINTS 128 //@fallback to PPU if a btConvexHullShape has more than MAX_NUM_SPU_CONVEX_POINTS points
+#define MAX_SPU_COMPOUND_SUBSHAPES 16 //@fallback on PPU if compound has more than MAX_SPU_COMPOUND_SUBSHAPES child shapes
+#define MAX_SHAPE_SIZE 256 //@todo: assert on this
 
 ATTRIBUTE_ALIGNED16(struct)	SpuConvexPolyhedronVertexData
 {
@@ -43,7 +46,7 @@ ATTRIBUTE_ALIGNED16(struct)	SpuConvexPolyhedronVertexData
 	ATTRIBUTE_ALIGNED16(btVector3 g_convexPointBuffer[MAX_NUM_SPU_CONVEX_POINTS]);
 };
 
-#define MAX_SHAPE_SIZE 256
+
 
 ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory
 {
@@ -53,7 +56,7 @@ ATTRIBUTE_ALIGNED16(struct) CollisionShape_LocalStoreMemory
 ATTRIBUTE_ALIGNED16(struct) CompoundShape_LocalStoreMemory
 {
 	// Compound data
-#define MAX_SPU_COMPOUND_SUBSHAPES 16
+
 	ATTRIBUTE_ALIGNED16(btCompoundShapeChild gSubshapes[MAX_SPU_COMPOUND_SUBSHAPES]);
 	ATTRIBUTE_ALIGNED16(char gSubshapeShape[MAX_SPU_COMPOUND_SUBSHAPES][MAX_SHAPE_SIZE]);
 };
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
index 5ddd1b85f..8584e74c1 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.cpp
@@ -17,6 +17,12 @@ subject to the following restrictions:
 
 //#define DEBUG_SPU_COLLISION_DETECTION 1
 
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+#ifndef __SPU__
+#include <stdio.h>
+#define spu_printf printf
+#endif
+#endif //DEBUG_SPU_COLLISION_DETECTION
 
 SpuContactResult::SpuContactResult()
 {
@@ -99,50 +105,50 @@ bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
 	if (depth > manifoldPtr->getContactBreakingThreshold())
 		return false;
 
-	//provide inverses or just calculate?
-	btTransform transAInv = transA.inverse();//m_body0->m_cachedInvertedWorldTransform;
-	btTransform transBInv= transB.inverse();//m_body1->m_cachedInvertedWorldTransform;
+	//if (depth > manifoldPtr->getContactProcessingThreshold())
+	//	return false;
+
+
 
 	btVector3 pointA;
 	btVector3 localA;
 	btVector3 localB;
 	btVector3 normal;
 
+
 	if (isSwapped)
 	{
 		normal = normalOnBInWorld * -1;
 		pointA = pointInWorld + normal * depth;
-		localA = transAInv(pointA );
-		localB = transBInv(pointInWorld);
-		/*localA = transBInv(pointA );
-		localB = transAInv(pointInWorld);*/
+		localA = transA.invXform(pointA );
+		localB = transB.invXform(pointInWorld);
 	}
 	else
 	{
 		normal = normalOnBInWorld;
 		pointA = pointInWorld + normal * depth;
-		localA = transAInv(pointA );
-		localB = transBInv(pointInWorld);
+		localA = transA.invXform(pointA );
+		localB = transB.invXform(pointInWorld);
 	}
 
 	btManifoldPoint newPt(localA,localB,normal,depth);
+	newPt.m_positionWorldOnA = pointA;
+	newPt.m_positionWorldOnB = pointInWorld;
+
+	newPt.m_combinedFriction = combinedFriction;
+	newPt.m_combinedRestitution = combinedRestitution;
+
 
 	int insertIndex = manifoldPtr->getCacheEntry(newPt);
 	if (insertIndex >= 0)
 	{
-//		manifoldPtr->replaceContactPoint(newPt,insertIndex);
-//		return true;
-
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-		spu_printf("SPU: same contact detected, nothing done\n");
-#endif //DEBUG_SPU_COLLISION_DETECTION
-		// This is not needed, just use the old info! saves a DMA transfer as well
+		// we need to replace the current contact point, otherwise small errors will accumulate (spheres start rolling etc)
+		manifoldPtr->replaceContactPoint(newPt,insertIndex);
+		return true;
+		
 	} else
 	{
 
-		newPt.m_combinedFriction = combinedFriction;
-		newPt.m_combinedRestitution = combinedRestitution;
-
 		/*
 		///@todo: SPU callbacks, either immediate (local on the SPU), or deferred
 		//User can override friction and/or restitution
@@ -155,6 +161,7 @@ bool ManifoldResultAddContactPoint(const btVector3& normalOnBInWorld,
 			(*gContactAddedCallback)(newPt,m_body0,m_partId0,m_index0,m_body1,m_partId1,m_index1);
 		}
 		*/
+
 		manifoldPtr->addManifoldPoint(newPt);
 		return true;
 
@@ -181,7 +188,12 @@ void SpuContactResult::writeDoubleBufferedManifold(btPersistentManifold* lsManif
 
 void SpuContactResult::addContactPoint(const btVector3& normalOnBInWorld,const btVector3& pointInWorld,btScalar depth)
 {
-	//spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
+#ifdef DEBUG_SPU_COLLISION_DETECTION
+	spu_printf("*** SpuContactResult::addContactPoint: depth = %f\n",depth);
+	spu_printf("*** normal = %f,%f,%f\n",normalOnBInWorld.getX(),normalOnBInWorld.getY(),normalOnBInWorld.getZ());
+	spu_printf("*** position = %f,%f,%f\n",pointInWorld.getX(),pointInWorld.getY(),pointInWorld.getZ());
+#endif //DEBUG_SPU_COLLISION_DETECTION
+	
 
 #ifdef DEBUG_SPU_COLLISION_DETECTION
  //   int sman = sizeof(rage::phManifold);
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
index e7be71ab1..394f56dcb 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuContactResult.h
@@ -17,7 +17,7 @@ subject to the following restrictions:
 #define SPU_CONTACT_RESULT2_H
 
 
-#ifndef WIN32
+#ifndef _WIN32
 #include <stdint.h>
 #endif
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
index 5e4cc2881..449f19288 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuConvexPenetrationDepthSolver.h
@@ -24,7 +24,7 @@ class btStackAlloc;
 class btIDebugDraw;
 #include "BulletCollision/NarrowphaseCollision/btConvexPenetrationDepthSolver.h"
 
-#include <LinearMath/btTransform.h>
+#include "LinearMath/btTransform.h"
 
 
 ///ConvexPenetrationDepthSolver provides an interface for penetration depth calculation.
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
index 754fd5d90..46e4d98c1 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.cpp
@@ -52,7 +52,7 @@ subject to the following restrictions:
 #ifdef __SPU__
 ///Software caching from the IBM Cell SDK, it reduces 25% SPU time for our test cases
 #ifndef USE_LIBSPE2
-#define USE_SOFTWARE_CACHE 1
+//#define USE_SOFTWARE_CACHE 1
 #endif
 #endif //__SPU__
 
@@ -190,10 +190,27 @@ void* createCollisionLocalStoreMemory()
 {
 	return &gLocalStoreMemory;
 }
+void deleteCollisionLocalStoreMemory()
+{
+}
 #else
+
+btAlignedObjectArray<CollisionTask_LocalStoreMemory*> sLocalStorePointers;
+
 void* createCollisionLocalStoreMemory()
 {
-        return new CollisionTask_LocalStoreMemory;
+    CollisionTask_LocalStoreMemory* localStore = new CollisionTask_LocalStoreMemory;
+    sLocalStorePointers.push_back(localStore);
+    return localStore;
+}
+
+void deleteCollisionLocalStoreMemory()
+{
+    for (int i=0;i<sLocalStorePointers.size();i++)
+    {
+        delete sLocalStorePointers[i];
+    }
+    sLocalStorePointers.clear();
 }
 
 #endif
@@ -372,6 +389,151 @@ public:
 };
 
 
+
+void btConvexPlaneCollideSingleContact (SpuCollisionPairInput* wuInput,CollisionTask_LocalStoreMemory* lsMemPtr,SpuContactResult&  spuContacts)
+{
+	
+	btConvexShape* convexShape = (btConvexShape*) wuInput->m_spuCollisionShapes[0];
+	btStaticPlaneShape* planeShape = (btStaticPlaneShape*) wuInput->m_spuCollisionShapes[1];
+
+    bool hasCollision = false;
+	const btVector3& planeNormal = planeShape->getPlaneNormal();
+	const btScalar& planeConstant = planeShape->getPlaneConstant();
+	
+	
+	btTransform convexWorldTransform = wuInput->m_worldTransform0;
+	btTransform convexInPlaneTrans;
+	convexInPlaneTrans= wuInput->m_worldTransform1.inverse() * convexWorldTransform;
+	btTransform planeInConvex;
+	planeInConvex= convexWorldTransform.inverse() * wuInput->m_worldTransform1;
+	
+	//btVector3 vtx = convexShape->localGetSupportVertexWithoutMarginNonVirtual(planeInConvex.getBasis()*-planeNormal);
+	btVector3 vtx = convexShape->localGetSupportVertexNonVirtual(planeInConvex.getBasis()*-planeNormal);
+
+	btVector3 vtxInPlane = convexInPlaneTrans(vtx);
+	btScalar distance = (planeNormal.dot(vtxInPlane) - planeConstant);
+
+	btVector3 vtxInPlaneProjected = vtxInPlane - distance*planeNormal;
+	btVector3 vtxInPlaneWorld = wuInput->m_worldTransform1 * vtxInPlaneProjected;
+
+	hasCollision = distance < lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold();
+	//resultOut->setPersistentManifold(m_manifoldPtr);
+	if (hasCollision)
+	{
+		/// report a contact. internally this will be kept persistent, and contact reduction is done
+		btVector3 normalOnSurfaceB =wuInput->m_worldTransform1.getBasis() * planeNormal;
+		btVector3 pOnB = vtxInPlaneWorld;
+		spuContacts.addContactPoint(normalOnSurfaceB,pOnB,distance);
+	}
+}
+
+void	ProcessConvexPlaneSpuCollision(SpuCollisionPairInput* wuInput, CollisionTask_LocalStoreMemory* lsMemPtr, SpuContactResult& spuContacts)
+{
+
+		register	int dmaSize = 0;
+		register ppu_address_t	dmaPpuAddress2;
+		btPersistentManifold* manifold = (btPersistentManifold*)wuInput->m_persistentManifoldPtr;
+
+		///DMA in the vertices for convex shapes
+		ATTRIBUTE_ALIGNED16(char convexHullShape0[sizeof(btConvexHullShape)]);
+		ATTRIBUTE_ALIGNED16(char convexHullShape1[sizeof(btConvexHullShape)]);
+
+		if ( btLikely( wuInput->m_shapeType0== CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			//	spu_printf("SPU: DMA btConvexHullShape\n");
+			
+			dmaSize = sizeof(btConvexHullShape);
+			dmaPpuAddress2 = wuInput->m_collisionShapes[0];
+
+			cellDmaGet(&convexHullShape0, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+			//cellDmaWaitTagStatusAll(DMA_MASK(1));
+		}
+
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			//	spu_printf("SPU: DMA btConvexHullShape\n");
+			dmaSize = sizeof(btConvexHullShape);
+			dmaPpuAddress2 = wuInput->m_collisionShapes[1];
+			cellDmaGet(&convexHullShape1, dmaPpuAddress2  , dmaSize, DMA_TAG(1), 0, 0);
+			//cellDmaWaitTagStatusAll(DMA_MASK(1));
+		}
+		
+		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{		
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			dmaConvexVertexData (&lsMemPtr->convexVertexData[0], (btConvexHullShape*)&convexHullShape0);
+			lsMemPtr->convexVertexData[0].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[0];
+		}
+
+			
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(1));
+			dmaConvexVertexData (&lsMemPtr->convexVertexData[1], (btConvexHullShape*)&convexHullShape1);
+			lsMemPtr->convexVertexData[1].gSpuConvexShapePtr = wuInput->m_spuCollisionShapes[1];
+		}
+
+		
+		btConvexPointCloudShape cpc0,cpc1;
+
+		if ( btLikely( wuInput->m_shapeType0 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(2));
+			lsMemPtr->convexVertexData[0].gConvexPoints = &lsMemPtr->convexVertexData[0].g_convexPointBuffer[0];
+			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[0];
+			const btVector3& localScaling = ch->getLocalScalingNV();
+			cpc0.setPoints(lsMemPtr->convexVertexData[0].gConvexPoints,lsMemPtr->convexVertexData[0].gNumConvexPoints,false,localScaling);
+			wuInput->m_spuCollisionShapes[0] = &cpc0;
+		}
+
+		if ( btLikely( wuInput->m_shapeType1 == CONVEX_HULL_SHAPE_PROXYTYPE ) )
+		{
+			cellDmaWaitTagStatusAll(DMA_MASK(2));		
+			lsMemPtr->convexVertexData[1].gConvexPoints = &lsMemPtr->convexVertexData[1].g_convexPointBuffer[0];
+			btConvexHullShape* ch = (btConvexHullShape*)wuInput->m_spuCollisionShapes[1];
+			const btVector3& localScaling = ch->getLocalScalingNV();
+			cpc1.setPoints(lsMemPtr->convexVertexData[1].gConvexPoints,lsMemPtr->convexVertexData[1].gNumConvexPoints,false,localScaling);
+			wuInput->m_spuCollisionShapes[1] = &cpc1;
+
+		}
+
+
+//		const btConvexShape* shape0Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[0];
+//		const btConvexShape* shape1Ptr = (const btConvexShape*)wuInput->m_spuCollisionShapes[1];
+//		int shapeType0 = wuInput->m_shapeType0;
+//		int shapeType1 = wuInput->m_shapeType1;
+		float marginA = wuInput->m_collisionMargin0;
+		float marginB = wuInput->m_collisionMargin1;
+
+		SpuClosestPointInput	cpInput;
+		cpInput.m_convexVertexData[0] = &lsMemPtr->convexVertexData[0];
+		cpInput.m_convexVertexData[1] = &lsMemPtr->convexVertexData[1];
+		cpInput.m_transformA = wuInput->m_worldTransform0;
+		cpInput.m_transformB = wuInput->m_worldTransform1;
+		float sumMargin = (marginA+marginB+lsMemPtr->getContactManifoldPtr()->getContactBreakingThreshold());
+		cpInput.m_maximumDistanceSquared = sumMargin * sumMargin;
+
+		ppu_address_t manifoldAddress = (ppu_address_t)manifold;
+
+		btPersistentManifold* spuManifold=lsMemPtr->getContactManifoldPtr();
+		//spuContacts.setContactInfo(spuManifold,manifoldAddress,wuInput->m_worldTransform0,wuInput->m_worldTransform1,wuInput->m_isSwapped);
+		spuContacts.setContactInfo(spuManifold,manifoldAddress,lsMemPtr->getColObj0()->getWorldTransform(),
+			lsMemPtr->getColObj1()->getWorldTransform(),
+			lsMemPtr->getColObj0()->getRestitution(),lsMemPtr->getColObj1()->getRestitution(),
+			lsMemPtr->getColObj0()->getFriction(),lsMemPtr->getColObj1()->getFriction(),
+			wuInput->m_isSwapped);
+
+
+		btConvexPlaneCollideSingleContact(wuInput,lsMemPtr,spuContacts);
+
+
+		
+	
+}
+
+
+
+
 ////////////////////////
 /// Convex versus Concave triangle mesh collision detection (handles concave triangle mesh versus sphere, box, cylinder, triangle, cone, convex polyhedron etc)
 ///////////////////
@@ -476,8 +638,9 @@ void	ProcessConvexConcaveSpuCollision(SpuCollisionPairInput* wuInput, CollisionT
 }
 
 
-int stats[11]={0,0,0,0,0,0,0,0,0,0,0};
-int degenerateStats[11]={0,0,0,0,0,0,0,0,0,0,0};
+#define MAX_DEGENERATE_STATS 15
+int stats[MAX_DEGENERATE_STATS]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+int degenerateStats[MAX_DEGENERATE_STATS]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 
 
 ////////////////////////
@@ -613,8 +776,10 @@ void	ProcessSpuConvexConvexCollision(SpuCollisionPairInput* wuInput, CollisionTa
 		{
 			btGjkPairDetector gjk(shape0Ptr,shape1Ptr,shapeType0,shapeType1,marginA,marginB,&simplexSolver,penetrationSolver);//&vsSolver,penetrationSolver);
 			gjk.getClosestPoints(cpInput,spuContacts,0);//,debugDraw);
-			
+
+			btAssert(gjk.m_lastUsedMethod <MAX_DEGENERATE_STATS);
 			stats[gjk.m_lastUsedMethod]++;
+			btAssert(gjk.m_degenerateSimplex <MAX_DEGENERATE_STATS);
 			degenerateStats[gjk.m_degenerateSimplex]++;
 
 #ifdef USE_SEPDISTANCE_UTIL			
@@ -719,16 +884,21 @@ void	handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
 		cellDmaWaitTagStatusAll(DMA_MASK(1));
 
 		int childShapeCount0 = spuCompoundShape0->getNumChildShapes();
+		btAssert(childShapeCount0< MAX_SPU_COMPOUND_SUBSHAPES);
 		int childShapeCount1 = spuCompoundShape1->getNumChildShapes();
+		btAssert(childShapeCount1< MAX_SPU_COMPOUND_SUBSHAPES);
 
 		// Start the N^2
 		for (int i = 0; i < childShapeCount0; ++i)
 		{
 			btCompoundShapeChild& childShape0 = lsMem.compoundShapeData[0].gSubshapes[i];
+			btAssert(!btBroadphaseProxy::isCompound(childShape0.m_childShapeType));
 
 			for (int j = 0; j < childShapeCount1; ++j)
 			{
 				btCompoundShapeChild& childShape1 = lsMem.compoundShapeData[1].gSubshapes[j];
+				btAssert(!btBroadphaseProxy::isCompound(childShape1.m_childShapeType));
+
 
 				/* Create a new collision pair input struct using the two child shapes */
 				SpuCollisionPairInput cinput (collisionPairInput);
@@ -741,9 +911,10 @@ void	handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
 				cinput.m_shapeType1 = childShape1.m_childShapeType;
 				cinput.m_collisionMargin1 = childShape1.m_childMargin;
 				/* Recursively call handleCollisionPair () with new collision pair input */
+				
 				handleCollisionPair(cinput, lsMem, spuContacts,			
 					(ppu_address_t)childShape0.m_childShape, lsMem.compoundShapeData[0].gSubshapeShape[i], 
-					(ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false); // bug fix: changed index to j.
+					(ppu_address_t)childShape1.m_childShape, lsMem.compoundShapeData[1].gSubshapeShape[j], false);
 			}
 		}
 	}
@@ -761,11 +932,12 @@ void	handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
 		cellDmaWaitTagStatusAll(DMA_MASK(1));
 
 		int childShapeCount = spuCompoundShape->getNumChildShapes();
+		btAssert(childShapeCount< MAX_SPU_COMPOUND_SUBSHAPES);
 
 		for (int i = 0; i < childShapeCount; ++i)
 		{
 			btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
-
+			btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
 			// Dma the child shape
 			dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
 			cellDmaWaitTagStatusAll(DMA_MASK(1));
@@ -793,10 +965,13 @@ void	handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
 		cellDmaWaitTagStatusAll(DMA_MASK(1));
 		
 		int childShapeCount = spuCompoundShape->getNumChildShapes();
+		btAssert(childShapeCount< MAX_SPU_COMPOUND_SUBSHAPES);
+
 
 		for (int i = 0; i < childShapeCount; ++i)
 		{
 			btCompoundShapeChild& childShape = lsMem.compoundShapeData[0].gSubshapes[i];
+			btAssert(!btBroadphaseProxy::isCompound(childShape.m_childShapeType));
 			// Dma the child shape
 			dmaCollisionShape (&lsMem.compoundShapeData[0].gSubshapeShape[i], (ppu_address_t)childShape.m_childShape, 1, childShape.m_childShapeType);
 			cellDmaWaitTagStatusAll(DMA_MASK(1));
@@ -845,17 +1020,33 @@ void	handleCollisionPair(SpuCollisionPairInput& collisionPairInput, CollisionTas
 				cellDmaWaitTagStatusAll(DMA_MASK(1) | DMA_MASK(2));
 			}
 			
-			btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
-			btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc;
+			if (collisionPairInput.m_shapeType1 == STATIC_PLANE_PROXYTYPE)
+			{
+				btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+				btStaticPlaneShape* planeShape= (btStaticPlaneShape*)collisionShape1Loc;
 
-			btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
-			collisionPairInput.m_primitiveDimensions0 = dim0;
-			collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
-			collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
-			collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
-			collisionPairInput.m_spuCollisionShapes[1] = trimeshShape;
+				btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+				collisionPairInput.m_primitiveDimensions0 = dim0;
+				collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+				collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+				collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+				collisionPairInput.m_spuCollisionShapes[1] = planeShape;
 
-			ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts);
+				ProcessConvexPlaneSpuCollision(&collisionPairInput,&lsMem,spuContacts);
+			} else
+			{
+				btConvexInternalShape* spuConvexShape0 = (btConvexInternalShape*)collisionShape0Loc;
+				btBvhTriangleMeshShape* trimeshShape = (btBvhTriangleMeshShape*)collisionShape1Loc;
+
+				btVector3 dim0 = spuConvexShape0->getImplicitShapeDimensions();
+				collisionPairInput.m_primitiveDimensions0 = dim0;
+				collisionPairInput.m_collisionShapes[0] = collisionShape0Ptr;
+				collisionPairInput.m_collisionShapes[1] = collisionShape1Ptr;
+				collisionPairInput.m_spuCollisionShapes[0] = spuConvexShape0;
+				collisionPairInput.m_spuCollisionShapes[1] = trimeshShape;
+
+				ProcessConvexConcaveSpuCollision(&collisionPairInput,&lsMem,spuContacts);
+			}
 		}
 
 	}
@@ -1033,7 +1224,7 @@ void	processCollisionTask(void* userPtr, void* lsMemPtr)
 											collisionPairInput.m_isSwapped);
 
 						
-									float distance=0.f;
+									//float distance=0.f;
 									btVector3 normalInB;
 
 
@@ -1054,38 +1245,64 @@ void	processCollisionTask(void* userPtr, void* lsMemPtr)
 												btScalar margin1 = lsMem.getlocalCollisionAlgorithm()->getCollisionMargin1();
 												btVector3 shapeDim0 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions0()+btVector3(margin0,margin0,margin0);
 												btVector3 shapeDim1 = lsMem.getlocalCollisionAlgorithm()->getShapeDimensions1()+btVector3(margin1,margin1,margin1);
+/*
+												//Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
+												vmVector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
+												vmVector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin());
+												vmMatrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis());
+												vmMatrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis());
 
-												Box boxA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
-												Vector3 vmPos0 = getVmVector3(collisionPairInput.m_worldTransform0.getOrigin());
-												Vector3 vmPos1 = getVmVector3(collisionPairInput.m_worldTransform1.getOrigin());
-												Matrix3 vmMatrix0 = getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis());
-												Matrix3 vmMatrix1 = getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis());
-
-												Transform3 transformA(vmMatrix0,vmPos0);
+												vmTransform3 transformA(vmMatrix0,vmPos0);
 												Box boxB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
-												Transform3 transformB(vmMatrix1,vmPos1);
+												vmTransform3 transformB(vmMatrix1,vmPos1);
 												BoxPoint resultClosestBoxPointA;
 												BoxPoint resultClosestBoxPointB;
-												Vector3 resultNormal;
+												vmVector3 resultNormal;
+												*/
+
 #ifdef USE_SEPDISTANCE_UTIL
 												float distanceThreshold = FLT_MAX
 #else
-												float distanceThreshold = 0.f;
+												//float distanceThreshold = 0.f;
 #endif
 
 
-												distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB,  boxA, transformA, boxB,transformB,distanceThreshold);
+												vmVector3 n;
+												Box boxA;
+												vmVector3 hA(shapeDim0.getX(),shapeDim0.getY(),shapeDim0.getZ());
+												vmVector3 hB(shapeDim1.getX(),shapeDim1.getY(),shapeDim1.getZ());
+												boxA.mHalf= hA;
+												vmTransform3 trA;
+												trA.setTranslation(getVmVector3(collisionPairInput.m_worldTransform0.getOrigin()));
+												trA.setUpper3x3(getVmMatrix3(collisionPairInput.m_worldTransform0.getBasis()));
+												Box boxB;
+												boxB.mHalf = hB;
+												vmTransform3 trB;
+												trB.setTranslation(getVmVector3(collisionPairInput.m_worldTransform1.getOrigin()));
+												trB.setUpper3x3(getVmMatrix3(collisionPairInput.m_worldTransform1.getBasis()));
 												
-												normalInB = -getBtVector3(resultNormal);
+												float distanceThreshold = spuManifold->getContactBreakingThreshold();//0.001f;
 
-												if(distance < spuManifold->getContactBreakingThreshold())
+
+												BoxPoint ptA,ptB;
+												float dist = boxBoxDistance(n, ptA, ptB,
+														   boxA, trA, boxB,	   trB,
+															distanceThreshold );
+
+
+//												float distance = boxBoxDistance(resultNormal,resultClosestBoxPointA,resultClosestBoxPointB,  boxA, transformA, boxB,transformB,distanceThreshold);
+												
+												normalInB = -getBtVector3(n);//resultNormal);
+
+												//if(dist < distanceThreshold)//spuManifold->getContactBreakingThreshold())
+												if(dist < spuManifold->getContactBreakingThreshold())
 												{
-													btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(resultClosestBoxPointB.localPoint));
+													btVector3 pointOnB = collisionPairInput.m_worldTransform1(getBtVector3(ptB.localPoint));
 
 													spuContacts.addContactPoint(
 														normalInB,
 														pointOnB,
-														distance);
+														dist);
 												}
 											} 
 #else									
@@ -1163,7 +1380,9 @@ void	processCollisionTask(void* userPtr, void* lsMemPtr)
 #endif //USE_SEPDISTANCE_UTIL
 											)
 										{
-											handleCollisionPair(collisionPairInput, lsMem, spuContacts,				(ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape,	(ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
+											handleCollisionPair(collisionPairInput, lsMem, spuContacts,
+												(ppu_address_t)lsMem.getColObj0()->getCollisionShape(), &lsMem.gCollisionShapes[0].collisionShape,
+												(ppu_address_t)lsMem.getColObj1()->getCollisionShape(), &lsMem.gCollisionShapes[1].collisionShape);
 										} else
 										{
 												//spu_printf("boxbox dist = %f\n",distance);
@@ -1209,3 +1428,5 @@ void	processCollisionTask(void* userPtr, void* lsMemPtr)
 
 	return;
 }
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
index bbaa555ee..64af964c1 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/SpuGatheringCollisionTask.h
@@ -47,7 +47,7 @@ __attribute__ ((aligned (128)))
 void	processCollisionTask(void* userPtr, void* lsMemory);
 
 void*	createCollisionLocalStoreMemory();
-
+void deleteCollisionLocalStoreMemory();
 
 #if defined(USE_LIBSPE2) && defined(__SPU__)
 #include "../SpuLibspe2Support.h"
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
index 30642a392..5e1202c01 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.cpp
@@ -15,7 +15,11 @@ subject to the following restrictions:
 */
 
 
-#include "Box.h"
+//#include "PfxContactBoxBox.h"
+
+#include <math.h>
+#include "../PlatformDefinitions.h"
+#include "boxBoxDistance.h"
 
 static inline float sqr( float a )
 {
@@ -114,18 +118,18 @@ VertexBFaceATest(
 	bool & inVoronoi,
 	float & t0,
 	float & t1,
-	const Vector3 & hA,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesB )
+	const vmVector3 & hA,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesB )
 {
 	// compute a corner of box B in A's coordinate system
 
-	Vector3 corner =
-		Vector3( faceOffsetAB + matrixAB.getCol0() * scalesB.getX() + matrixAB.getCol1() * scalesB.getY() );
+	vmVector3 corner =
+		vmVector3( faceOffsetAB + matrixAB.getCol0() * scalesB.getX() + matrixAB.getCol1() * scalesB.getY() );
 
 	// compute the parameters of the point on A, closest to this corner
 
@@ -144,8 +148,8 @@ VertexBFaceATest(
 	// do the Voronoi test: already know the point on B is in the Voronoi region of the
 	// point on A, check the reverse.
 
-	Vector3 facePointB =
-		Vector3( mulPerElem( faceOffsetBA + matrixBA.getCol0() * t0 + matrixBA.getCol1() * t1 - scalesB, signsB ) );
+	vmVector3 facePointB =
+		vmVector3( mulPerElem( faceOffsetBA + matrixBA.getCol0() * t0 + matrixBA.getCol1() * t1 - scalesB, signsB ) );
 
 	inVoronoi = ( ( facePointB[0] >= voronoiTol * facePointB[2] ) &&
 				  ( facePointB[1] >= voronoiTol * facePointB[0] ) &&
@@ -169,17 +173,17 @@ void
 VertexBFaceATests(
 	bool & done,
 	float & minDistSqr,
-	Point3 & localPointA,
-	Point3 & localPointB,
+	vmPoint3 & localPointA,
+	vmPoint3 & localPointB,
 	FeatureType & featureA,
 	FeatureType & featureB,
-	const Vector3 & hA,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesB,
+	const vmVector3 & hA,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesB,
 	bool first )
 {
 		
@@ -247,16 +251,16 @@ VertexAFaceBTest(
 	bool & inVoronoi,
 	float & t0,
 	float & t1,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) scalesA )
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) scalesA )
 {
-	Vector3 corner =
-		Vector3( faceOffsetBA + matrixBA.getCol0() * scalesA.getX() + matrixBA.getCol1() * scalesA.getY() );
+	vmVector3 corner =
+		vmVector3( faceOffsetBA + matrixBA.getCol0() * scalesA.getX() + matrixBA.getCol1() * scalesA.getY() );
 
 	t0 = corner[0];
 	t1 = corner[1];
@@ -270,8 +274,8 @@ VertexAFaceBTest(
 	else if ( t1 < -hB[1] )
 		t1 = -hB[1];
 
-	Vector3 facePointA =
-		Vector3( mulPerElem( faceOffsetAB + matrixAB.getCol0() * t0 + matrixAB.getCol1() * t1 - scalesA, signsA ) );
+	vmVector3 facePointA =
+		vmVector3( mulPerElem( faceOffsetAB + matrixAB.getCol0() * t0 + matrixAB.getCol1() * t1 - scalesA, signsA ) );
 
 	inVoronoi = ( ( facePointA[0] >= voronoiTol * facePointA[2] ) &&
 				  ( facePointA[1] >= voronoiTol * facePointA[0] ) &&
@@ -295,17 +299,17 @@ void
 VertexAFaceBTests(
 	bool & done,
 	float & minDistSqr,
-	Point3 & localPointA,
-	Point3 & localPointB,
+	vmPoint3 & localPointA,
+	vmPoint3 & localPointB,
 	FeatureType & featureA,
 	FeatureType & featureB,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) scalesA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) scalesA,
 	bool first )
 {
 	float t0, t1;
@@ -363,7 +367,7 @@ VertexAFaceBTests(
 }
 
 //-------------------------------------------------------------------------------------------------
-// EdgeEdgeTest:
+// CustomEdgeEdgeTest:
 //
 // tests whether a pair of edges are the closest features
 //
@@ -374,10 +378,10 @@ VertexAFaceBTests(
 // the dimension of the face normal is 2
 //-------------------------------------------------------------------------------------------------
 
-#define EdgeEdgeTest( ac, ac_letter, ad, ad_letter, bc, bc_letter, bd, bd_letter )              \
+#define CustomEdgeEdgeTest( ac, ac_letter, ad, ad_letter, bc, bc_letter, bd, bd_letter )              \
 {                                                                                               \
-   Vector3 edgeOffsetAB;                                                                          \
-   Vector3 edgeOffsetBA;                                                                          \
+   vmVector3 edgeOffsetAB;                                                                          \
+   vmVector3 edgeOffsetBA;                                                                          \
                                                                                                 \
    edgeOffsetAB = faceOffsetAB + matrixAB.getCol##bc() * scalesB.get##bc_letter();            \
    edgeOffsetAB.set##ac_letter( edgeOffsetAB.get##ac_letter() - scalesA.get##ac_letter() );  \
@@ -421,8 +425,8 @@ VertexAFaceBTests(
       else if ( tA > hA[ad] ) tA = hA[ad];                                                      \
    }                                                                                            \
                                                                                                 \
-   Vector3 edgeOffAB = Vector3( mulPerElem( edgeOffsetAB + matrixAB.getCol##bd() * tB, signsA ) );\
-   Vector3 edgeOffBA = Vector3( mulPerElem( edgeOffsetBA + matrixBA.getCol##ad() * tA, signsB ) );\
+   vmVector3 edgeOffAB = vmVector3( mulPerElem( edgeOffsetAB + matrixAB.getCol##bd() * tB, signsA ) );\
+   vmVector3 edgeOffBA = vmVector3( mulPerElem( edgeOffsetBA + matrixBA.getCol##ad() * tA, signsB ) );\
                                                                                                 \
    inVoronoi = ( edgeOffAB[ac] >= voronoiTol * edgeOffAB[2] ) &&                                \
                ( edgeOffAB[2] >= voronoiTol * edgeOffAB[ac] ) &&                                \
@@ -436,79 +440,79 @@ VertexAFaceBTests(
 }
 
 float
-EdgeEdgeTest_0101(
+CustomEdgeEdgeTest_0101(
 	bool & inVoronoi,
 	float & tA,
 	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
 {
-	EdgeEdgeTest( 0, X, 1, Y, 0, X, 1, Y );
+	CustomEdgeEdgeTest( 0, X, 1, Y, 0, X, 1, Y );
 }
 
 float
-EdgeEdgeTest_0110(
+CustomEdgeEdgeTest_0110(
 	bool & inVoronoi,
 	float & tA,
 	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
 {
-	EdgeEdgeTest( 0, X, 1, Y, 1, Y, 0, X );
+	CustomEdgeEdgeTest( 0, X, 1, Y, 1, Y, 0, X );
 }
 
 float
-EdgeEdgeTest_1001(
+CustomEdgeEdgeTest_1001(
 	bool & inVoronoi,
 	float & tA,
 	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
 {
-	EdgeEdgeTest( 1, Y, 0, X, 0, X, 1, Y );
+	CustomEdgeEdgeTest( 1, Y, 0, X, 0, X, 1, Y );
 }
 
 float
-EdgeEdgeTest_1010(
+CustomEdgeEdgeTest_1010(
 	bool & inVoronoi,
 	float & tA,
 	float & tB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB )
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB )
 {
-	EdgeEdgeTest( 1, Y, 0, X, 1, Y, 0, X );
+	CustomEdgeEdgeTest( 1, Y, 0, X, 1, Y, 0, X );
 }
 
 #define EdgeEdge_SetNewMin( ac_letter, ad_letter, bc_letter, bd_letter )   \
@@ -528,22 +532,22 @@ void
 EdgeEdgeTests(
 	bool & done,
 	float & minDistSqr,
-	Point3 & localPointA,
-	Point3 & localPointB,
+	vmPoint3 & localPointA,
+	vmPoint3 & localPointB,
 	int & otherFaceDimA,
 	int & otherFaceDimB,
 	FeatureType & featureA,
 	FeatureType & featureB,
-	const Vector3 & hA,
-	const Vector3 & hB,
-	PE_REF(Vector3) faceOffsetAB,
-	PE_REF(Vector3) faceOffsetBA,
-	const Matrix3 & matrixAB,
-	const Matrix3 & matrixBA,
-	PE_REF(Vector3) signsA,
-	PE_REF(Vector3) signsB,
-	PE_REF(Vector3) scalesA,
-	PE_REF(Vector3) scalesB,
+	const vmVector3 & hA,
+	const vmVector3 & hB,
+	PE_REF(vmVector3) faceOffsetAB,
+	PE_REF(vmVector3) faceOffsetBA,
+	const vmMatrix3 & matrixAB,
+	const vmMatrix3 & matrixBA,
+	PE_REF(vmVector3) signsA,
+	PE_REF(vmVector3) signsB,
+	PE_REF(vmVector3) scalesA,
+	PE_REF(vmVector3) scalesB,
 	bool first )
 {
 
@@ -555,7 +559,7 @@ EdgeEdgeTests(
 	testOtherFaceDimA = 0;
 	testOtherFaceDimB = 0;
 
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( first ) {
@@ -572,7 +576,7 @@ EdgeEdgeTests(
 	signsA.setX( -signsA.getX() );
 	scalesA.setX( -scalesA.getX() );
 
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -585,7 +589,7 @@ EdgeEdgeTests(
 	signsB.setX( -signsB.getX() );
 	scalesB.setX( -scalesB.getX() );
 
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -598,7 +602,7 @@ EdgeEdgeTests(
 	signsA.setX( -signsA.getX() );
 	scalesA.setX( -scalesA.getX() );
 
-	distSqr = EdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0101( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -613,7 +617,7 @@ EdgeEdgeTests(
 	signsB.setX( -signsB.getX() );
 	scalesB.setX( -scalesB.getX() );
 
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -626,7 +630,7 @@ EdgeEdgeTests(
 	signsA.setY( -signsA.getY() );
 	scalesA.setY( -scalesA.getY() );
 
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -639,7 +643,7 @@ EdgeEdgeTests(
 	signsB.setX( -signsB.getX() );
 	scalesB.setX( -scalesB.getX() );
 
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -652,7 +656,7 @@ EdgeEdgeTests(
 	signsA.setY( -signsA.getY() );
 	scalesA.setY( -scalesA.getY() );
 
-	distSqr = EdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1001( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -667,7 +671,7 @@ EdgeEdgeTests(
 	signsB.setX( -signsB.getX() );
 	scalesB.setX( -scalesB.getX() );
 
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -680,7 +684,7 @@ EdgeEdgeTests(
 	signsA.setX( -signsA.getX() );
 	scalesA.setX( -scalesA.getX() );
 
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -693,7 +697,7 @@ EdgeEdgeTests(
 	signsB.setY( -signsB.getY() );
 	scalesB.setY( -scalesB.getY() );
 
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -706,7 +710,7 @@ EdgeEdgeTests(
 	signsA.setX( -signsA.getX() );
 	scalesA.setX( -scalesA.getX() );
 
-	distSqr = EdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_0110( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -721,7 +725,7 @@ EdgeEdgeTests(
 	signsB.setY( -signsB.getY() );
 	scalesB.setY( -scalesB.getY() );
 
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -734,7 +738,7 @@ EdgeEdgeTests(
 	signsA.setY( -signsA.getY() );
 	scalesA.setY( -scalesA.getY() );
 
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -747,7 +751,7 @@ EdgeEdgeTests(
 	signsB.setY( -signsB.getY() );
 	scalesB.setY( -scalesB.getY() );
 
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -760,7 +764,7 @@ EdgeEdgeTests(
 	signsA.setY( -signsA.getY() );
 	scalesA.setY( -scalesA.getY() );
 
-	distSqr = EdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
+	distSqr = CustomEdgeEdgeTest_1010( done, tA, tB, hA, hB, faceOffsetAB, faceOffsetBA,
 								 matrixAB, matrixBA, signsA, signsB, scalesA, scalesB );
 
 	if ( distSqr < minDistSqr ) {
@@ -768,27 +772,25 @@ EdgeEdgeTests(
 	}
 }
 
+
 float
-boxBoxDistance(
-	Vector3& normal,
-	BoxPoint& boxPointA,
-	BoxPoint& boxPointB,
-	PE_REF(Box) boxA, const Transform3& transformA,
-	PE_REF(Box) boxB, const Transform3& transformB,
-	float distanceThreshold )
+boxBoxDistance(vmVector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB,
+			   PE_REF(Box) boxA, const vmTransform3 & transformA, PE_REF(Box) boxB,
+			   const vmTransform3 & transformB,
+			   float distanceThreshold)
 {
-	Matrix3 identity;
-	identity = Matrix3::identity();
-	Vector3 ident[3];
+	vmMatrix3 identity;
+	identity = vmMatrix3::identity();
+	vmVector3 ident[3];
 	ident[0] = identity.getCol0();
 	ident[1] = identity.getCol1();
 	ident[2] = identity.getCol2();
 
 	// get relative transformations
 
-	Transform3 transformAB, transformBA;
-	Matrix3 matrixAB, matrixBA;
-	Vector3 offsetAB, offsetBA;
+	vmTransform3 transformAB, transformBA;
+	vmMatrix3 matrixAB, matrixBA;
+	vmVector3 offsetAB, offsetBA;
 
 	transformAB = orthoInverse(transformA) * transformB;
 	transformBA = orthoInverse(transformAB);
@@ -798,25 +800,25 @@ boxBoxDistance(
 	matrixBA = transformBA.getUpper3x3();
 	offsetBA = transformBA.getTranslation();
 
-	Matrix3 absMatrixAB = absPerElem(matrixAB);
-	Matrix3 absMatrixBA = absPerElem(matrixBA);
+	vmMatrix3 absMatrixAB = absPerElem(matrixAB);
+	vmMatrix3 absMatrixBA = absPerElem(matrixBA);
 
 	// find separating axis with largest gap between projections
 
 	BoxSepAxisType axisType;
-	Vector3 axisA(0.0f), axisB(0.0f);
+	vmVector3 axisA(0.0f), axisB(0.0f);
 	float gap, maxGap;
 	int faceDimA = 0, faceDimB = 0, edgeDimA = 0, edgeDimB = 0;
 
 	// face axes
 
-	Vector3  gapsA   = absPerElem(offsetAB) - boxA.half - absMatrixAB * boxB.half;
+	vmVector3  gapsA   = absPerElem(offsetAB) - boxA.mHalf - absMatrixAB * boxB.mHalf;
 
 	AaxisTest(0,X,true);
 	AaxisTest(1,Y,false);
 	AaxisTest(2,Z,false);
 
-	Vector3  gapsB   = absPerElem(offsetBA) - boxB.half - absMatrixBA * boxA.half;
+	vmVector3  gapsB   = absPerElem(offsetBA) - boxB.mHalf - absMatrixBA * boxA.mHalf;
 
 	BaxisTest(0,X);
 	BaxisTest(1,Y);
@@ -825,10 +827,10 @@ boxBoxDistance(
 	// cross product axes
 
 	// �O�ς��O�̂Ƃ��̑΍�
-	absMatrixAB += Matrix3(1.0e-5f);
-	absMatrixBA += Matrix3(1.0e-5f);
+	absMatrixAB += vmMatrix3(1.0e-5f);
+	absMatrixBA += vmMatrix3(1.0e-5f);
 
-	Matrix3 lsqrs, projOffset, projAhalf, projBhalf;
+	vmMatrix3 lsqrs, projOffset, projAhalf, projBhalf;
 
 	lsqrs.setCol0( mulPerElem( matrixBA.getCol2(), matrixBA.getCol2() ) +
 				   mulPerElem( matrixBA.getCol1(), matrixBA.getCol1() ) );
@@ -841,15 +843,15 @@ boxBoxDistance(
 	projOffset.setCol1(matrixBA.getCol2() * offsetAB.getX() - matrixBA.getCol0() * offsetAB.getZ());
 	projOffset.setCol2(matrixBA.getCol0() * offsetAB.getY() - matrixBA.getCol1() * offsetAB.getX());
 
-	projAhalf.setCol0(absMatrixBA.getCol1() * boxA.half.getZ() + absMatrixBA.getCol2() * boxA.half.getY());
-	projAhalf.setCol1(absMatrixBA.getCol2() * boxA.half.getX() + absMatrixBA.getCol0() * boxA.half.getZ());
-	projAhalf.setCol2(absMatrixBA.getCol0() * boxA.half.getY() + absMatrixBA.getCol1() * boxA.half.getX());
+	projAhalf.setCol0(absMatrixBA.getCol1() * boxA.mHalf.getZ() + absMatrixBA.getCol2() * boxA.mHalf.getY());
+	projAhalf.setCol1(absMatrixBA.getCol2() * boxA.mHalf.getX() + absMatrixBA.getCol0() * boxA.mHalf.getZ());
+	projAhalf.setCol2(absMatrixBA.getCol0() * boxA.mHalf.getY() + absMatrixBA.getCol1() * boxA.mHalf.getX());
 
-	projBhalf.setCol0(absMatrixAB.getCol1() * boxB.half.getZ() + absMatrixAB.getCol2() * boxB.half.getY());
-	projBhalf.setCol1(absMatrixAB.getCol2() * boxB.half.getX() + absMatrixAB.getCol0() * boxB.half.getZ());
-	projBhalf.setCol2(absMatrixAB.getCol0() * boxB.half.getY() + absMatrixAB.getCol1() * boxB.half.getX());
+	projBhalf.setCol0(absMatrixAB.getCol1() * boxB.mHalf.getZ() + absMatrixAB.getCol2() * boxB.mHalf.getY());
+	projBhalf.setCol1(absMatrixAB.getCol2() * boxB.mHalf.getX() + absMatrixAB.getCol0() * boxB.mHalf.getZ());
+	projBhalf.setCol2(absMatrixAB.getCol0() * boxB.mHalf.getY() + absMatrixAB.getCol1() * boxB.mHalf.getX());
 
-	Matrix3 gapsAxB = absPerElem(projOffset) - projAhalf - transpose(projBhalf);
+	vmMatrix3 gapsAxB = absPerElem(projOffset) - projAhalf - transpose(projBhalf);
 
 	CrossAxisTest(0,0,X);
 	CrossAxisTest(0,1,Y);
@@ -872,7 +874,7 @@ boxBoxDistance(
 			axisA = -axisA;
 		axisB = matrixBA * -axisA;
 
-		Vector3 absAxisB = Vector3(absPerElem(axisB));
+		vmVector3 absAxisB = vmVector3(absPerElem(axisB));
 
 		if ( ( absAxisB[0] > absAxisB[1] ) && ( absAxisB[0] > absAxisB[2] ) )
 			faceDimB = 0;
@@ -885,7 +887,7 @@ boxBoxDistance(
 			axisB = -axisB;
 		axisA = matrixAB * -axisB;
 
-		Vector3 absAxisA = Vector3(absPerElem(axisA));
+		vmVector3 absAxisA = vmVector3(absPerElem(axisA));
 
 		if ( ( absAxisA[0] > absAxisA[1] ) && ( absAxisA[0] > absAxisA[2] ) )
 			faceDimA = 0;
@@ -900,8 +902,8 @@ boxBoxDistance(
 			axisA = -axisA;
 		axisB = matrixBA * -axisA;
 
-		Vector3 absAxisA = Vector3(absPerElem(axisA));
-		Vector3 absAxisB = Vector3(absPerElem(axisB));
+		vmVector3 absAxisA = vmVector3(absPerElem(axisA));
+		vmVector3 absAxisB = vmVector3(absPerElem(axisB));
 
 		dimA[1] = edgeDimA;
 		dimB[1] = edgeDimB;
@@ -966,7 +968,7 @@ boxBoxDistance(
 		dimB[1] = (faceDimB+2)%3;
 	}
 
-	Matrix3 aperm_col, bperm_col;
+	vmMatrix3 aperm_col, bperm_col;
 
 	aperm_col.setCol0(ident[dimA[0]]);
 	aperm_col.setCol1(ident[dimA[1]]);
@@ -976,32 +978,32 @@ boxBoxDistance(
 	bperm_col.setCol1(ident[dimB[1]]);
 	bperm_col.setCol2(ident[dimB[2]]);
 
-	Matrix3 aperm_row, bperm_row;
+	vmMatrix3 aperm_row, bperm_row;
 
 	aperm_row = transpose(aperm_col);
 	bperm_row = transpose(bperm_col);
 
 	// permute all box parameters to be in the face coordinate systems
 
-	Matrix3 matrixAB_perm = aperm_row * matrixAB * bperm_col;
-	Matrix3 matrixBA_perm = transpose(matrixAB_perm);
+	vmMatrix3 matrixAB_perm = aperm_row * matrixAB * bperm_col;
+	vmMatrix3 matrixBA_perm = transpose(matrixAB_perm);
 
-	Vector3 offsetAB_perm, offsetBA_perm;
+	vmVector3 offsetAB_perm, offsetBA_perm;
 
 	offsetAB_perm = aperm_row * offsetAB;
 	offsetBA_perm = bperm_row * offsetBA;
 
-	Vector3 halfA_perm, halfB_perm;
+	vmVector3 halfA_perm, halfB_perm;
 
-	halfA_perm = aperm_row * boxA.half;
-	halfB_perm = bperm_row * boxB.half;
+	halfA_perm = aperm_row * boxA.mHalf;
+	halfB_perm = bperm_row * boxB.mHalf;
 
 	// compute the vector between the centers of each face, in each face's coordinate frame
 
-	Vector3 signsA_perm, signsB_perm, scalesA_perm, scalesB_perm, faceOffsetAB_perm, faceOffsetBA_perm;
+	vmVector3 signsA_perm, signsB_perm, scalesA_perm, scalesB_perm, faceOffsetAB_perm, faceOffsetBA_perm;
 
-	signsA_perm = copySignPerElem(Vector3(1.0f),aperm_row * axisA);
-	signsB_perm = copySignPerElem(Vector3(1.0f),bperm_row * axisB);
+	signsA_perm = copySignPerElem(vmVector3(1.0f),aperm_row * axisA);
+	signsB_perm = copySignPerElem(vmVector3(1.0f),bperm_row * axisB);
 	scalesA_perm = mulPerElem( signsA_perm, halfA_perm );
 	scalesB_perm = mulPerElem( signsB_perm, halfB_perm );
 
@@ -1031,11 +1033,11 @@ boxBoxDistance(
 	// if for some reason no case passes the Voronoi test, the features with the minimum
 	// distance are returned.
 
-	Point3 localPointA_perm, localPointB_perm;
+	vmPoint3 localPointA_perm, localPointB_perm;
 	float minDistSqr;
 	bool done;
 
-	Vector3 hA_perm( halfA_perm ), hB_perm( halfB_perm );
+	vmVector3 hA_perm( halfA_perm ), hB_perm( halfB_perm );
 
 	localPointA_perm.setZ( scalesA_perm.getZ() );
 	localPointB_perm.setZ( scalesB_perm.getZ() );
@@ -1109,9 +1111,11 @@ boxBoxDistance(
 
 	// convert local points from face-local to box-local coordinate system
 
-	boxPointA.localPoint = Point3( aperm_col * Vector3( localPointA_perm ) );
-	boxPointB.localPoint = Point3( bperm_col * Vector3( localPointB_perm ) );
+	
+	boxPointA.localPoint = vmPoint3( aperm_col * vmVector3( localPointA_perm )) ;
+	boxPointB.localPoint = vmPoint3( bperm_col * vmVector3( localPointB_perm )) ;
 
+#if 0
 	// find which features of the boxes are involved.
 	// the only feature pairs which occur in this function are VF, FV, and EE, even though the
 	// closest points might actually lie on sub-features, as in a VF contact might be used for
@@ -1144,6 +1148,7 @@ boxBoxDistance(
 	} else {
 		boxPointB.setVertexFeature( sB[0], sB[1], sB[2] );
 	}
+#endif
 
 	normal = transformA * axisA;
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
index c58e257c0..0d4957dea 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuNarrowPhaseCollisionTask/boxBoxDistance.h
@@ -21,7 +21,6 @@ subject to the following restrictions:
 
 #include "Box.h"
 
-using namespace Vectormath::Aos;
 
 //---------------------------------------------------------------------------
 // boxBoxDistance:
@@ -38,7 +37,7 @@ using namespace Vectormath::Aos;
 //    positive or negative distance between two boxes.
 //
 // args:
-//    Vector3& normal: set to a unit contact normal pointing from box A to box B.
+//    vmVector3& normal: set to a unit contact normal pointing from box A to box B.
 //
 //    BoxPoint& boxPointA, BoxPoint& boxPointB:
 //       set to a closest point or point of penetration on each box.
@@ -46,7 +45,7 @@ using namespace Vectormath::Aos;
 //    Box boxA, Box boxB:
 //       boxes, represented as 3 half-widths
 //
-//    const Transform3& transformA, const Transform3& transformB:
+//    const vmTransform3& transformA, const vmTransform3& transformB:
 //       box transformations, in world coordinates
 //
 //    float distanceThreshold:
@@ -58,9 +57,9 @@ using namespace Vectormath::Aos;
 //---------------------------------------------------------------------------
 
 float
-boxBoxDistance(Vector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB,
-			   PE_REF(Box) boxA, const Transform3 & transformA, PE_REF(Box) boxB,
-			   const Transform3 & transformB,
+boxBoxDistance(vmVector3& normal, BoxPoint& boxPointA, BoxPoint& boxPointB,
+			   PE_REF(Box) boxA, const vmTransform3 & transformA, PE_REF(Box) boxB,
+			   const vmTransform3 & transformB,
 			   float distanceThreshold = FLT_MAX );
 
 #endif /* __BOXBOXDISTANCE_H__ */
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuSampleTaskProcess.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuSampleTaskProcess.h
index d733a9a85..6173225ae 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuSampleTaskProcess.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuSampleTaskProcess.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SPU_SAMPLE_TASK_PROCESS_H
-#define SPU_SAMPLE_TASK_PROCESS_H
+#ifndef BT_SPU_SAMPLE_TASK_PROCESS_H
+#define BT_SPU_SAMPLE_TASK_PROCESS_H
 
 #include <assert.h>
 
@@ -149,5 +149,5 @@ int main(unsigned long long speid, addr64 argp, addr64 envp)
 
 
 
-#endif // SPU_SAMPLE_TASK_PROCESS_H
+#endif // BT_SPU_SAMPLE_TASK_PROCESS_H
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/SpuSync.h b/Engine/lib/bullet/src/BulletMultiThreaded/SpuSync.h
index b90d0fcbf..4157b8f0d 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/SpuSync.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/SpuSync.h
@@ -15,8 +15,8 @@ subject to the following restrictions:
 Written by: Marten Svanfeldt
 */
 
-#ifndef SPU_SYNC_H
-#define	SPU_SYNC_H
+#ifndef BT_SPU_SYNC_H
+#define	BT_SPU_SYNC_H
 
 
 #include "PlatformDefinitions.h"
@@ -145,4 +145,5 @@ private:
 #endif
 
 
-#endif
+#endif //BT_SPU_SYNC_H
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/TrbDynBody.h b/Engine/lib/bullet/src/BulletMultiThreaded/TrbDynBody.h
new file mode 100644
index 000000000..a7f4bf1b3
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/TrbDynBody.h
@@ -0,0 +1,79 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef BT_RB_DYN_BODY_H__
+#define BT_RB_DYN_BODY_H__
+
+#include "vectormath/vmInclude.h"
+using namespace Vectormath::Aos;
+
+#include "TrbStateVec.h"
+
+class CollObject;
+
+class TrbDynBody
+{
+public:
+	TrbDynBody()
+	{
+		fMass   = 0.0f;
+		fCollObject = NULL;
+		fElasticity = 0.2f;
+		fFriction = 0.8f;
+	}
+
+	// Get methods
+	float          getMass() const {return fMass;};
+	float          getElasticity() const {return fElasticity;}
+	float          getFriction() const {return fFriction;}
+	CollObject*    getCollObject() const {return fCollObject;}
+	const Matrix3 &getBodyInertia() const {return fIBody;}
+	const Matrix3 &getBodyInertiaInv() const {return fIBodyInv;}
+	float          getMassInv() const {return fMassInv;}
+
+	// Set methods
+	void           setMass(float mass) {fMass=mass;fMassInv=mass>0.0f?1.0f/mass:0.0f;}
+	void           setBodyInertia(const Matrix3 bodyInertia) {fIBody = bodyInertia;fIBodyInv = inverse(bodyInertia);}
+	void           setElasticity(float elasticity) {fElasticity = elasticity;}
+	void           setFriction(float friction) {fFriction = friction;}
+	void           setCollObject(CollObject *collObj) {fCollObject = collObj;}
+	
+	void           setBodyInertiaInv(const Matrix3 bodyInertiaInv) 
+	{
+		fIBody = inverse(bodyInertiaInv);
+		fIBodyInv = bodyInertiaInv;
+	}
+	void           setMassInv(float invMass) {
+		fMass= invMass>0.0f ? 1.0f/invMass :0.0f;
+		fMassInv=invMass;
+	}
+
+
+private:
+	// Rigid Body constants
+	float          fMass;        // Rigid Body mass
+	float          fMassInv;     // Inverse of mass
+	Matrix3        fIBody;       // Inertia matrix in body's coords
+	Matrix3        fIBodyInv;    // Inertia matrix inverse in body's coords
+	float          fElasticity;  // Coefficient of restitution
+	float          fFriction;    // Coefficient of friction
+
+public:
+	CollObject*    fCollObject;  // Collision object corresponding the RB
+} __attribute__ ((aligned(16)));
+
+#endif //BT_RB_DYN_BODY_H__
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/TrbStateVec.h b/Engine/lib/bullet/src/BulletMultiThreaded/TrbStateVec.h
new file mode 100644
index 000000000..b6d895e12
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/TrbStateVec.h
@@ -0,0 +1,339 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef BT_TRBSTATEVEC_H__
+#define BT_TRBSTATEVEC_H__
+
+#include <stdlib.h>
+#ifdef PFX_USE_FREE_VECTORMATH
+#include "vecmath/vmInclude.h"
+#else
+#include "vectormath/vmInclude.h"
+#endif //PFX_USE_FREE_VECTORMATH
+
+
+#include "PlatformDefinitions.h"
+
+
+static inline vmVector3 read_Vector3(const float* p)
+{
+	vmVector3 v;
+	loadXYZ(v, p);
+	return v;
+}
+
+static inline vmQuat read_Quat(const float* p)
+{
+	vmQuat vq;
+	loadXYZW(vq, p);
+	return vq;
+}
+
+static inline void store_Vector3(const vmVector3 &src, float* p)
+{
+	vmVector3 v = src;
+	storeXYZ(v, p);
+}
+
+static inline void store_Quat(const vmQuat &src, float* p)
+{
+	vmQuat vq = src;
+	storeXYZW(vq, p);
+}
+
+// Motion Type
+enum {
+	PfxMotionTypeFixed = 0,
+	PfxMotionTypeActive,
+	PfxMotionTypeKeyframe,
+	PfxMotionTypeOneWay,
+	PfxMotionTypeTrigger,
+	PfxMotionTypeCount
+};
+
+#define PFX_MOTION_MASK_DYNAMIC 0x0a // Active,OneWay
+#define PFX_MOTION_MASK_STATIC  0x95 // Fixed,Keyframe,Trigger,Sleeping
+#define PFX_MOTION_MASK_SLEEP   0x0e // Can sleep
+#define PFX_MOTION_MASK_TYPE    0x7f
+
+//
+// Rigid Body state
+//
+
+#ifdef __CELLOS_LV2__
+ATTRIBUTE_ALIGNED128(class) TrbState
+#else
+ATTRIBUTE_ALIGNED16(class) TrbState
+#endif
+
+{
+public:
+	TrbState()
+	{
+		setMotionType(PfxMotionTypeActive);
+		contactFilterSelf=contactFilterTarget=0xffffffff;
+		deleted = 0;
+		mSleeping = 0;
+		useSleep = 1;
+		trbBodyIdx=0;
+		mSleepCount=0;
+		useCcd = 0;
+		useContactCallback = 0;
+		useSleepCallback = 0;
+		linearDamping = 1.0f;
+		angularDamping = 0.99f;
+	}
+
+	TrbState(const uint8_t m, const vmVector3& x, const vmQuat& q, const vmVector3& v, const vmVector3& omega );
+	
+	uint16_t	mSleepCount;
+	uint8_t		mMotionType;
+	uint8_t		deleted            : 1;
+	uint8_t		mSleeping           : 1;
+	uint8_t		useSleep           : 1;
+	uint8_t		useCcd		       : 1;
+	uint8_t		useContactCallback : 1;
+	uint8_t		useSleepCallback   : 1;
+
+	uint16_t	trbBodyIdx;
+	uint32_t	contactFilterSelf;
+	uint32_t	contactFilterTarget;
+
+	float		center[3];		// AABB center(World)
+	float		half[3];		// AABB half(World)
+
+	float		linearDamping;
+	float		angularDamping;
+	
+	float		deltaLinearVelocity[3];
+	float		deltaAngularVelocity[3];
+
+	float     fX[3];				// position
+	float     fQ[4];				// orientation
+	float     fV[3];				// velocity
+	float     fOmega[3];			// angular velocity
+
+	inline void setZero();      // Zeroes out the elements
+	inline void setIdentity();  // Sets the rotation to identity and zeroes out the other elements
+
+	bool		isDeleted() const {return deleted==1;}
+
+	uint16_t	getRigidBodyId() const {return trbBodyIdx;}
+	void		setRigidBodyId(uint16_t i) {trbBodyIdx = i;}
+
+
+	uint32_t	getContactFilterSelf() const {return contactFilterSelf;}
+	void		setContactFilterSelf(uint32_t filter) {contactFilterSelf = filter;}
+
+	uint32_t	getContactFilterTarget() const {return contactFilterTarget;}
+	void		setContactFilterTarget(uint32_t filter) {contactFilterTarget = filter;}
+
+	float getLinearDamping() const {return linearDamping;}
+	float getAngularDamping() const {return angularDamping;}
+
+	void setLinearDamping(float damping) {linearDamping=damping;}
+	void setAngularDamping(float damping) {angularDamping=damping;}
+
+
+	uint8_t		getMotionType() const {return mMotionType;}
+	void		setMotionType(uint8_t t) {mMotionType = t;mSleeping=0;mSleepCount=0;}
+
+	uint8_t		getMotionMask() const {return (1<<mMotionType)|(mSleeping<<7);}
+
+	bool		isAsleep() const {return mSleeping==1;}
+	bool		isAwake() const {return mSleeping==0;}
+
+	void		wakeup() {mSleeping=0;mSleepCount=0;}
+	void		sleep() {if(useSleep) {mSleeping=1;mSleepCount=0;}}
+
+	uint8_t		getUseSleep() const {return useSleep;}
+	void		setUseSleep(uint8_t b) {useSleep=b;}
+
+	uint8_t		getUseCcd() const {return useCcd;}
+	void		setUseCcd(uint8_t b) {useCcd=b;}
+
+	uint8_t		getUseContactCallback() const {return useContactCallback;}
+	void		setUseContactCallback(uint8_t b) {useContactCallback=b;}
+
+	uint8_t		getUseSleepCallback() const {return useSleepCallback;}
+	void		setUseSleepCallback(uint8_t b) {useSleepCallback=b;}
+
+	void	 	incrementSleepCount() {mSleepCount++;}
+	void		resetSleepCount() {mSleepCount=0;}
+	uint16_t	getSleepCount() const {return mSleepCount;}
+
+	vmVector3 getPosition() const {return read_Vector3(fX);}
+	vmQuat    getOrientation() const {return read_Quat(fQ);}
+	vmVector3 getLinearVelocity() const {return read_Vector3(fV);}
+	vmVector3 getAngularVelocity() const {return read_Vector3(fOmega);}
+	vmVector3 getDeltaLinearVelocity() const {return read_Vector3(deltaLinearVelocity);}
+	vmVector3 getDeltaAngularVelocity() const {return read_Vector3(deltaAngularVelocity);}
+
+	void setPosition(const vmVector3 &pos) {store_Vector3(pos, fX);}
+	void setLinearVelocity(const vmVector3 &vel) {store_Vector3(vel, fV);}
+	void setAngularVelocity(const vmVector3 &vel) {store_Vector3(vel, fOmega);}
+	void setDeltaLinearVelocity(const vmVector3 &vel) {store_Vector3(vel, deltaLinearVelocity);}
+	void setDeltaAngularVelocity(const vmVector3 &vel) {store_Vector3(vel, deltaAngularVelocity);}
+	void setOrientation(const vmQuat &rot) {store_Quat(rot, fQ);}
+
+	inline void setAuxils(const vmVector3 &centerLocal,const vmVector3 &halfLocal);
+	inline void	setAuxilsCcd(const vmVector3 &centerLocal,const vmVector3 &halfLocal,float timeStep);
+	inline	void reset();
+};
+
+inline
+TrbState::TrbState(const uint8_t m, const vmVector3& x, const vmQuat& q, const vmVector3& v, const vmVector3& omega)
+{
+	setMotionType(m);
+	fX[0] = x[0];
+	fX[1] = x[1];
+	fX[2] = x[2];
+	fQ[0] = q[0];
+	fQ[1] = q[1];
+	fQ[2] = q[2];
+	fQ[3] = q[3];
+	fV[0] = v[0];
+	fV[1] = v[1];
+	fV[2] = v[2];
+	fOmega[0] = omega[0];
+	fOmega[1] = omega[1];
+	fOmega[2] = omega[2];
+	contactFilterSelf=contactFilterTarget=0xffff;
+	trbBodyIdx=0;
+	mSleeping = 0;
+	deleted = 0;
+	useSleep = 1;
+	useCcd = 0;
+	useContactCallback = 0;
+	useSleepCallback = 0;
+	mSleepCount=0;
+	linearDamping = 1.0f;
+	angularDamping = 0.99f;
+}
+
+inline void
+TrbState::setIdentity()
+{
+	fX[0] = 0.0f;
+	fX[1] = 0.0f;
+	fX[2] = 0.0f;
+	fQ[0] = 0.0f;
+	fQ[1] = 0.0f;
+	fQ[2] = 0.0f;
+	fQ[3] = 1.0f;
+	fV[0] = 0.0f;
+	fV[1] = 0.0f;
+	fV[2] = 0.0f;
+	fOmega[0] = 0.0f;
+	fOmega[1] = 0.0f;
+	fOmega[2] = 0.0f;
+}
+
+inline void
+TrbState::setZero()
+{
+	fX[0] = 0.0f;
+	fX[1] = 0.0f;
+	fX[2] = 0.0f;
+	fQ[0] = 0.0f;
+	fQ[1] = 0.0f;
+	fQ[2] = 0.0f;
+	fQ[3] = 0.0f;
+	fV[0] = 0.0f;
+	fV[1] = 0.0f;
+	fV[2] = 0.0f;
+	fOmega[0] = 0.0f;
+	fOmega[1] = 0.0f;
+	fOmega[2] = 0.0f;
+}
+
+inline void
+TrbState::setAuxils(const vmVector3 &centerLocal,const vmVector3 &halfLocal)
+{
+	vmVector3 centerW = getPosition() + rotate(getOrientation(),centerLocal);
+	vmVector3 halfW = absPerElem(vmMatrix3(getOrientation())) * halfLocal;
+	center[0] = centerW[0];
+	center[1] = centerW[1];
+	center[2] = centerW[2];
+	half[0] = halfW[0];
+	half[1] = halfW[1];
+	half[2] = halfW[2];
+}
+
+inline void
+TrbState::setAuxilsCcd(const vmVector3 &centerLocal,const vmVector3 &halfLocal,float timeStep)
+{
+	vmVector3 centerW = getPosition() + rotate(getOrientation(),centerLocal);
+	vmVector3 halfW = absPerElem(vmMatrix3(getOrientation())) * halfLocal;
+
+	vmVector3 diffvec = getLinearVelocity()*timeStep;
+
+	vmVector3 newCenter = centerW + diffvec;
+	vmVector3 aabbMin = minPerElem(newCenter - halfW,centerW - halfW);
+	vmVector3 aabbMax = maxPerElem(newCenter + halfW,centerW + halfW);
+	
+	centerW = 0.5f * (aabbMin + aabbMax);
+	halfW =0.5f * (aabbMax - aabbMin);
+
+	center[0] = centerW[0];
+	center[1] = centerW[1];
+	center[2] = centerW[2];
+
+	half[0] = halfW[0];
+	half[1] = halfW[1];
+	half[2] = halfW[2];
+}
+
+inline
+void TrbState::reset()
+{
+#if 0
+	mSleepCount = 0;
+	mMotionType = PfxMotionTypeActive;
+	mDeleted = 0;
+	mSleeping = 0;
+	mUseSleep = 1;
+	mUseCcd = 0;
+	mUseContactCallback = 0;
+	mUseSleepCallback = 0;
+	mRigidBodyId = 0;
+	mContactFilterSelf = 0xffffffff;
+	mContactFilterTarget = 0xffffffff;
+	mLinearDamping = 1.0f;
+	mAngularDamping = 0.99f;
+	mPosition = vmVector3(0.0f);
+	mOrientation = vmQuat::identity();
+	mLinearVelocity = vmVector3(0.0f);
+	mAngularVelocity = vmVector3(0.0f);
+#endif
+
+	setMotionType(PfxMotionTypeActive);
+	contactFilterSelf=contactFilterTarget=0xffffffff;
+	deleted = 0;
+	mSleeping = 0;
+	useSleep = 1;
+	trbBodyIdx=0;
+	mSleepCount=0;
+	useCcd = 0;
+	useContactCallback = 0;
+	useSleepCallback = 0;
+	linearDamping = 1.0f;
+	angularDamping = 0.99f;
+}
+
+#endif //BT_TRBSTATEVEC_H__
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.cpp
index ae3e68c0b..ae224b59e 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.cpp
@@ -69,8 +69,8 @@ DWORD WINAPI Thread_no_1( LPVOID lpParam )
 		{
 			//exit Thread
 			status->m_status = 3;
-			SetEvent(status->m_eventCompletetHandle);
 			printf("Thread with taskId %i with handle %p exiting\n",status->m_taskId, status->m_threadHandle);
+			SetEvent(status->m_eventCompletetHandle);
 			break;
 		}
 		
@@ -176,6 +176,53 @@ void Win32ThreadSupport::waitForResponse(unsigned int *puiArgument0, unsigned in
 }
 
 
+///check for messages from SPUs
+bool Win32ThreadSupport::isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds)
+{
+	///We should wait for (one of) the first tasks to finish (or other SPU messages), and report its response
+	
+	///A possible response can be 'yes, SPU handled it', or 'no, please do a PPU fallback'
+
+
+	btAssert(m_activeSpuStatus.size());
+
+	int last = -1;
+#ifndef SINGLE_THREADED
+	DWORD res = WaitForMultipleObjects(m_completeHandles.size(), &m_completeHandles[0], FALSE, timeOutInMilliseconds);
+	
+	if ((res != STATUS_TIMEOUT) && (res != WAIT_FAILED))
+	{
+		
+		btAssert(res != WAIT_FAILED);
+		last = res - WAIT_OBJECT_0;
+
+		btSpuStatus& spuStatus = m_activeSpuStatus[last];
+		btAssert(spuStatus.m_threadHandle);
+		btAssert(spuStatus.m_eventCompletetHandle);
+
+		//WaitForSingleObject(spuStatus.m_eventCompletetHandle, INFINITE);
+		btAssert(spuStatus.m_status > 1);
+		spuStatus.m_status = 0;
+
+		///need to find an active spu
+		btAssert(last>=0);
+
+	#else
+		last=0;
+		btSpuStatus& spuStatus = m_activeSpuStatus[last];
+	#endif //SINGLE_THREADED
+
+		
+
+		*puiArgument0 = spuStatus.m_taskId;
+		*puiArgument1 = spuStatus.m_status;
+
+		return true;
+	} 
+
+	return false;
+}
+
 
 void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadConstructionInfo)
 {
@@ -201,10 +248,10 @@ void Win32ThreadSupport::startThreads(const Win32ThreadConstructionInfo& threadC
 		spuStatus.m_userPtr=0;
 
 		sprintf(spuStatus.m_eventStartHandleName,"eventStart%s%d",threadConstructionInfo.m_uniqueName,i);
-		spuStatus.m_eventStartHandle = CreateEventA(0,false,false,spuStatus.m_eventStartHandleName);
+		spuStatus.m_eventStartHandle = CreateEventA (0,false,false,spuStatus.m_eventStartHandleName);
 
 		sprintf(spuStatus.m_eventCompletetHandleName,"eventComplete%s%d",threadConstructionInfo.m_uniqueName,i);
-		spuStatus.m_eventCompletetHandle = CreateEventA(0,false,false,spuStatus.m_eventCompletetHandleName);
+		spuStatus.m_eventCompletetHandle = CreateEventA (0,false,false,spuStatus.m_eventCompletetHandleName);
 
 		m_completeHandles[i] = spuStatus.m_eventCompletetHandle;
 
@@ -252,6 +299,7 @@ void Win32ThreadSupport::stopSPU()
 		CloseHandle(spuStatus.m_eventCompletetHandle);
 		CloseHandle(spuStatus.m_eventStartHandle);
 		CloseHandle(spuStatus.m_threadHandle);
+
 	}
 
 	m_activeSpuStatus.clear();
@@ -259,4 +307,152 @@ void Win32ThreadSupport::stopSPU()
 
 }
 
+
+
+class btWin32Barrier : public btBarrier
+{
+private:
+	CRITICAL_SECTION mExternalCriticalSection;
+	CRITICAL_SECTION mLocalCriticalSection;
+	HANDLE mRunEvent,mNotifyEvent;
+	int mCounter,mEnableCounter;
+	int mMaxCount;
+
+public:
+	btWin32Barrier()
+	{
+		mCounter = 0;
+		mMaxCount = 1;
+		mEnableCounter = 0;
+		InitializeCriticalSection(&mExternalCriticalSection);
+		InitializeCriticalSection(&mLocalCriticalSection);
+		mRunEvent = CreateEvent(NULL,TRUE,FALSE,NULL);
+		mNotifyEvent = CreateEvent(NULL,TRUE,FALSE,NULL);
+	}
+
+	virtual ~btWin32Barrier()
+	{
+		DeleteCriticalSection(&mExternalCriticalSection);
+		DeleteCriticalSection(&mLocalCriticalSection);
+		CloseHandle(mRunEvent);
+		CloseHandle(mNotifyEvent);
+	}
+
+	void sync()
+	{
+		int eventId;
+
+		EnterCriticalSection(&mExternalCriticalSection);
+
+		//PFX_PRINTF("enter taskId %d count %d stage %d phase %d mEnableCounter %d\n",taskId,mCounter,debug&0xff,debug>>16,mEnableCounter);
+
+		if(mEnableCounter > 0) {
+			ResetEvent(mNotifyEvent);
+			LeaveCriticalSection(&mExternalCriticalSection);
+			WaitForSingleObject(mNotifyEvent,INFINITE); 
+			EnterCriticalSection(&mExternalCriticalSection);
+		}
+
+		eventId = mCounter;
+		mCounter++;
+
+		if(eventId == mMaxCount-1) {
+			SetEvent(mRunEvent);
+
+			mEnableCounter = mCounter-1;
+			mCounter = 0;
+		}
+		else {
+			ResetEvent(mRunEvent);
+			LeaveCriticalSection(&mExternalCriticalSection);
+			WaitForSingleObject(mRunEvent,INFINITE); 
+			EnterCriticalSection(&mExternalCriticalSection);
+			mEnableCounter--;
+		}
+
+		if(mEnableCounter == 0) {
+			SetEvent(mNotifyEvent);
+		}
+
+		//PFX_PRINTF("leave taskId %d count %d stage %d phase %d mEnableCounter %d\n",taskId,mCounter,debug&0xff,debug>>16,mEnableCounter);
+
+		LeaveCriticalSection(&mExternalCriticalSection);
+	}
+
+	virtual void setMaxCount(int n) {mMaxCount = n;}
+	virtual int  getMaxCount() {return mMaxCount;}
+};
+
+class btWin32CriticalSection : public btCriticalSection
+{
+private:
+	CRITICAL_SECTION mCriticalSection;
+
+public:
+	btWin32CriticalSection()
+	{
+		InitializeCriticalSection(&mCriticalSection);
+	}
+
+	~btWin32CriticalSection()
+	{
+		DeleteCriticalSection(&mCriticalSection);
+	}
+
+	unsigned int getSharedParam(int i)
+	{
+		btAssert(i>=0&&i<31);
+		return mCommonBuff[i+1];
+	}
+
+	void setSharedParam(int i,unsigned int p)
+	{
+		btAssert(i>=0&&i<31);
+		mCommonBuff[i+1] = p;
+	}
+
+	void lock()
+	{
+		EnterCriticalSection(&mCriticalSection);
+		mCommonBuff[0] = 1;
+	}
+
+	void unlock()
+	{
+		mCommonBuff[0] = 0;
+		LeaveCriticalSection(&mCriticalSection);
+	}
+};
+
+
+btBarrier*	Win32ThreadSupport::createBarrier()
+{
+	unsigned char* mem = (unsigned char*)btAlignedAlloc(sizeof(btWin32Barrier),16);
+	btWin32Barrier* barrier = new(mem) btWin32Barrier();
+	barrier->setMaxCount(getNumTasks());
+	return barrier;
+}
+
+btCriticalSection* Win32ThreadSupport::createCriticalSection()
+{
+	unsigned char* mem = (unsigned char*) btAlignedAlloc(sizeof(btWin32CriticalSection),16);
+	btWin32CriticalSection* cs = new(mem) btWin32CriticalSection();
+	return cs;
+}
+
+void Win32ThreadSupport::deleteBarrier(btBarrier* barrier)
+{
+	barrier->~btBarrier();
+	btAlignedFree(barrier);
+}
+
+void Win32ThreadSupport::deleteCriticalSection(btCriticalSection* criticalSection)
+{
+	criticalSection->~btCriticalSection();
+	btAlignedFree(criticalSection);
+}
+
+
 #endif //USE_WIN32_THREADING
+
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.h b/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.h
index c61ad901c..f688e6c85 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/Win32ThreadSupport.h
@@ -18,8 +18,8 @@ subject to the following restrictions:
 
 #ifdef USE_WIN32_THREADING  //platform specific defines are defined in PlatformDefinitions.h
 
-#ifndef WIN32_THREAD_SUPPORT_H
-#define WIN32_THREAD_SUPPORT_H
+#ifndef BT_WIN32_THREAD_SUPPORT_H
+#define BT_WIN32_THREAD_SUPPORT_H
 
 #include "LinearMath/btAlignedObjectArray.h"
 
@@ -30,10 +30,6 @@ typedef void (*Win32ThreadFunc)(void* userPtr,void* lsMemory);
 typedef void* (*Win32lsMemorySetupFunc)();
 
 
-
-
-
-
 ///Win32ThreadSupport helps to initialize/shutdown libspe2, start/stop SPU tasks and communication
 class Win32ThreadSupport : public btThreadSupportInterface 
 {
@@ -70,7 +66,7 @@ public:
 
 	struct	Win32ThreadConstructionInfo
 	{
-		Win32ThreadConstructionInfo(char* uniqueName,
+		Win32ThreadConstructionInfo(const char* uniqueName,
 									Win32ThreadFunc userThreadFunc,
 									Win32lsMemorySetupFunc	lsMemoryFunc,
 									int numThreads=1,
@@ -85,7 +81,7 @@ public:
 
 		}
 
-		char*					m_uniqueName;
+		const char*				m_uniqueName;
 		Win32ThreadFunc			m_userThreadFunc;
 		Win32lsMemorySetupFunc	m_lsMemoryFunc;
 		int						m_numThreads;
@@ -109,6 +105,8 @@ public:
 ///check for messages from SPUs
 	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1);
 
+	virtual bool isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds);
+
 ///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
 	virtual	void startSPU();
 
@@ -125,8 +123,19 @@ public:
 		return m_maxNumTasks;
 	}
 
+	virtual void*	getThreadLocalMemory(int taskId)
+	{
+		return m_activeSpuStatus[taskId].m_lsMemory;
+	}
+	virtual btBarrier*	createBarrier();
+
+	virtual btCriticalSection* createCriticalSection();
+
+	virtual void deleteBarrier(btBarrier* barrier);
+
+        virtual void deleteCriticalSection(btCriticalSection* criticalSection);
 };
 
-#endif //WIN32_THREAD_SUPPORT_H
+#endif //BT_WIN32_THREAD_SUPPORT_H
 
 #endif //USE_WIN32_THREADING
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
index 84a5e59f0..e1d0219d5 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.cpp
@@ -81,7 +81,7 @@ btGpu3DGridBroadphase::btGpu3DGridBroadphase(	btOverlappingPairCache* overlappin
 btGpu3DGridBroadphase::~btGpu3DGridBroadphase()
 {
 	//btSimpleBroadphase will free memory of btSortedOverlappingPairCache, because m_ownsPairCache
-	assert(m_bInitialized);
+	btAssert(m_bInitialized);
 	_finalize();
 }
 
@@ -121,7 +121,7 @@ void btGpu3DGridBroadphase::_initialize(	const btVector3& worldAabbMin,const btV
 
 	m_LastLargeHandleIndex = -1;
 
-    assert(!m_bInitialized);
+    btAssert(!m_bInitialized);
     // allocate host storage
     m_hBodiesHash = new unsigned int[m_maxHandles * 2];
     memset(m_hBodiesHash, 0x00, m_maxHandles*2*sizeof(unsigned int));
@@ -175,7 +175,7 @@ void btGpu3DGridBroadphase::_initialize(	const btVector3& worldAabbMin,const btV
 
 void btGpu3DGridBroadphase::_finalize()
 {
-    assert(m_bInitialized);
+    btAssert(m_bInitialized);
     delete [] m_hBodiesHash;
     delete [] m_hCellStart;
     delete [] m_hPairBuffStartCurr;
@@ -393,7 +393,7 @@ void btGpu3DGridBroadphase::addLarge2LargePairsToCache(btDispatcher* dispatcher)
 
 
 
-void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback)
+void btGpu3DGridBroadphase::rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback,const btVector3& aabbMin,const btVector3& aabbMax)
 {
 	btSimpleBroadphase::rayTest(rayFrom, rayTo, rayCallback);
 	for (int i=0; i <= m_LastLargeHandleIndex; i++)
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.h b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.h
index 1d49a0557..1154a5fa6 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphase.h
@@ -103,7 +103,9 @@ public:
 
 	virtual btBroadphaseProxy*	createProxy(const btVector3& aabbMin,  const btVector3& aabbMax,int shapeType,void* userPtr ,short int collisionFilterGroup,short int collisionFilterMask, btDispatcher* dispatcher,void* multiSapProxy);
 	virtual void	destroyProxy(btBroadphaseProxy* proxy,btDispatcher* dispatcher);
-	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback);
+	virtual void	rayTest(const btVector3& rayFrom,const btVector3& rayTo, btBroadphaseRayCallback& rayCallback, const btVector3& aabbMin=btVector3(0,0,0),const btVector3& aabbMax=btVector3(0,0,0));
+
+    
 	virtual void	resetPool(btDispatcher* dispatcher);
 
 protected:
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
index db1b6206e..607bda7ed 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedDefs.h
@@ -57,4 +57,5 @@ void BT_GPU_PREF(squeezeOverlappingPairBuff)(unsigned int* pPairBuff, unsigned i
 
 //----------------------------------------------------------------------------------------
 
-#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
\ No newline at end of file
+#endif // BTGPU3DGRIDBROADPHASESHAREDDEFS_H
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
index e1988841e..616a40094 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btGpu3DGridBroadphaseSharedTypes.h
@@ -63,4 +63,5 @@ struct bt3DGrid3F1U
 
 //----------------------------------------------------------------------------------------
 
-#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
\ No newline at end of file
+#endif // BTGPU3DGRIDBROADPHASESHAREDTYPES_H
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btGpuDefines.h b/Engine/lib/bullet/src/BulletMultiThreaded/btGpuDefines.h
index 3b5eac028..f9315ab64 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btGpuDefines.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btGpuDefines.h
@@ -195,7 +195,7 @@ inline float3 operator-(const float3& v)
 #define BT_GPU_PREF(func) btGpu_##func
 #define BT_GPU_SAFE_CALL(func) func
 #define BT_GPU_Memset memset
-#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(a, b, c)
+#define BT_GPU_MemcpyToSymbol(a, b, c) memcpy(&a, b, c)
 #define BT_GPU_BindTexture(a, b, c, d)
 #define BT_GPU_UnbindTexture(a)
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedCode.h b/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedCode.h
index e6f583e33..5761e7901 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedCode.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedCode.h
@@ -51,4 +51,5 @@ void BT_GPU_PREF(computeGridSize)(int n, int blockSize, int &numBlocks, int &num
 
 //----------------------------------------------------------------------------------------
 
-} // extern "C"
\ No newline at end of file
+} // extern "C"
+
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedDefs.h b/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedDefs.h
index a4790598a..dccfda54c 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedDefs.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btGpuUtilsSharedDefs.h
@@ -1,6 +1,6 @@
 /*
 Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
-Copyright (C) 2006, 2009 Sony Computer Entertainment Inc. 
+Copyright (C) 2006, 2007 Sony Computer Entertainment Inc. 
 
 This software is provided 'as-is', without any express or implied warranty.
 In no event will the authors be held liable for any damages arising from the use of this software.
@@ -13,8 +13,6 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-
-
 // Shared definitions for GPU-based utilities
 
 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -24,17 +22,14 @@ subject to the following restrictions:
 //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 
-
 #ifndef BTGPUUTILSDHAREDDEFS_H
 #define BTGPUUTILSDHAREDDEFS_H
 
 
-
 extern "C"
 {
 
 
-
 //Round a / b to nearest higher integer value
 int BT_GPU_PREF(iDivUp)(int a, int b);
 
@@ -45,15 +40,13 @@ void BT_GPU_PREF(allocateArray)(void** devPtr, unsigned int size);
 void BT_GPU_PREF(freeArray)(void* devPtr);
 void BT_GPU_PREF(copyArrayFromDevice)(void* host, const void* device, unsigned int size);
 void BT_GPU_PREF(copyArrayToDevice)(void* device, const void* host, unsigned int size);
-
-
-
-
+void BT_GPU_PREF(registerGLBufferObject(unsigned int vbo));
+void* BT_GPU_PREF(mapGLBufferObject(unsigned int vbo));
+void BT_GPU_PREF(unmapGLBufferObject(unsigned int vbo));
 
 
 } // extern "C"
 
 
-
 #endif // BTGPUUTILSDHAREDDEFS_H
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btParallelConstraintSolver.cpp b/Engine/lib/bullet/src/BulletMultiThreaded/btParallelConstraintSolver.cpp
new file mode 100644
index 000000000..08d9a2ba5
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btParallelConstraintSolver.cpp
@@ -0,0 +1,1552 @@
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "btParallelConstraintSolver.h"
+#include "BulletDynamics/ConstraintSolver/btContactSolverInfo.h"
+#include "BulletCollision/BroadphaseCollision/btDispatcher.h"
+#include "LinearMath/btPoolAllocator.h"
+#include "BulletCollision/NarrowPhaseCollision/btPersistentManifold.h"
+#include "BulletMultiThreaded/vectormath2bullet.h"
+
+#include "LinearMath/btQuickprof.h"
+#include "BulletMultiThreaded/btThreadSupportInterface.h"
+#ifdef PFX_USE_FREE_VECTORMATH
+#include "vecmath/vmInclude.h"
+#else
+#include "vectormath/vmInclude.h"
+#endif //PFX_USE_FREE_VECTORMATH
+
+#include "HeapManager.h"
+
+#include "PlatformDefinitions.h"
+
+//#include "PfxSimdUtils.h"
+#include "LinearMath/btScalar.h"
+
+#include "TrbStateVec.h"
+
+
+
+/////////////////
+
+
+#define TMP_BUFF_BYTES (15*1024*1024)
+unsigned char ATTRIBUTE_ALIGNED128(tmp_buff[TMP_BUFF_BYTES]);
+
+
+
+// Project Gauss Seidel or the equivalent Sequential Impulse
+ inline void resolveSingleConstraintRowGeneric(PfxSolverBody& body1,PfxSolverBody& body2,const btSolverConstraint& c)
+{
+
+	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
+	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(getBtVector3(body1.mDeltaLinearVelocity)) 	+ c.m_relpos1CrossNormal.dot(getBtVector3(body1.mDeltaAngularVelocity));
+	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(getBtVector3(body2.mDeltaLinearVelocity)) + c.m_relpos2CrossNormal.dot(getBtVector3(body2.mDeltaAngularVelocity));
+//	const btScalar delta_rel_vel	=	deltaVel1Dotn-deltaVel2Dotn;
+	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
+	deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
+
+	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
+	if (sum < c.m_lowerLimit)
+	{
+		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_lowerLimit;
+	}
+	else if (sum > c.m_upperLimit) 
+	{
+		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
+		c.m_appliedImpulse = c.m_upperLimit;
+	}
+	else
+	{
+		c.m_appliedImpulse = sum;
+	}
+	
+
+	if (body1.mMassInv)
+	{
+		btVector3 linearComponent = c.m_contactNormal*body1.mMassInv;
+		body1.mDeltaLinearVelocity += vmVector3(linearComponent.getX()*deltaImpulse,linearComponent.getY()*deltaImpulse,linearComponent.getZ()*deltaImpulse);
+		btVector3 tmp=c.m_angularComponentA*(btVector3(deltaImpulse,deltaImpulse,deltaImpulse));
+		body1.mDeltaAngularVelocity += vmVector3(tmp.getX(),tmp.getY(),tmp.getZ());
+	}
+
+	if (body2.mMassInv)
+	{
+		btVector3 linearComponent = -c.m_contactNormal*body2.mMassInv;
+		body2.mDeltaLinearVelocity += vmVector3(linearComponent.getX()*deltaImpulse,linearComponent.getY()*deltaImpulse,linearComponent.getZ()*deltaImpulse);
+		btVector3 tmp = c.m_angularComponentB*((btVector3(deltaImpulse,deltaImpulse,deltaImpulse)));//*m_angularFactor);
+		body2.mDeltaAngularVelocity += vmVector3(tmp.getX(),tmp.getY(),tmp.getZ());
+	}
+
+	//body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
+	//body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
+
+}
+
+ 
+static SIMD_FORCE_INLINE
+void pfxSolveLinearConstraintRow(btConstraintRow &constraint,
+	vmVector3 &deltaLinearVelocityA,vmVector3 &deltaAngularVelocityA,
+	float massInvA,const vmMatrix3 &inertiaInvA,const vmVector3 &rA,
+	vmVector3 &deltaLinearVelocityB,vmVector3 &deltaAngularVelocityB,
+	float massInvB,const vmMatrix3 &inertiaInvB,const vmVector3 &rB)
+{
+	const vmVector3 normal(btReadVector3(constraint.m_normal));
+	btScalar deltaImpulse = constraint.m_rhs;
+	vmVector3 dVA = deltaLinearVelocityA + cross(deltaAngularVelocityA,rA);
+	vmVector3 dVB = deltaLinearVelocityB + cross(deltaAngularVelocityB,rB);
+	deltaImpulse -= constraint.m_jacDiagInv * dot(normal,dVA-dVB);
+	btScalar oldImpulse = constraint.m_accumImpulse;
+	constraint.m_accumImpulse = btClamped(oldImpulse + deltaImpulse,constraint.m_lowerLimit,constraint.m_upperLimit);
+	deltaImpulse = constraint.m_accumImpulse - oldImpulse;
+	deltaLinearVelocityA += deltaImpulse * massInvA * normal;
+	deltaAngularVelocityA += deltaImpulse * inertiaInvA * cross(rA,normal);
+	deltaLinearVelocityB -= deltaImpulse * massInvB * normal;
+	deltaAngularVelocityB -= deltaImpulse * inertiaInvB * cross(rB,normal);
+
+}
+ 
+void btSolveContactConstraint(
+	btConstraintRow &constraintResponse,
+	btConstraintRow &constraintFriction1,
+	btConstraintRow &constraintFriction2,
+	const vmVector3 &contactPointA,
+	const vmVector3 &contactPointB,
+	PfxSolverBody &solverBodyA,
+	PfxSolverBody &solverBodyB,
+	float friction
+	)
+{
+	vmVector3 rA = rotate(solverBodyA.mOrientation,contactPointA);
+	vmVector3 rB = rotate(solverBodyB.mOrientation,contactPointB);
+
+	pfxSolveLinearConstraintRow(constraintResponse,
+		solverBodyA.mDeltaLinearVelocity,solverBodyA.mDeltaAngularVelocity,solverBodyA.mMassInv,solverBodyA.mInertiaInv,rA,
+		solverBodyB.mDeltaLinearVelocity,solverBodyB.mDeltaAngularVelocity,solverBodyB.mMassInv,solverBodyB.mInertiaInv,rB);
+
+	float mf = friction*fabsf(constraintResponse.m_accumImpulse);
+	constraintFriction1.m_lowerLimit = -mf;
+	constraintFriction1.m_upperLimit =  mf;
+	constraintFriction2.m_lowerLimit = -mf;
+	constraintFriction2.m_upperLimit =  mf;
+
+	pfxSolveLinearConstraintRow(constraintFriction1,
+		solverBodyA.mDeltaLinearVelocity,solverBodyA.mDeltaAngularVelocity,solverBodyA.mMassInv,solverBodyA.mInertiaInv,rA,
+		solverBodyB.mDeltaLinearVelocity,solverBodyB.mDeltaAngularVelocity,solverBodyB.mMassInv,solverBodyB.mInertiaInv,rB);
+
+	pfxSolveLinearConstraintRow(constraintFriction2,
+		solverBodyA.mDeltaLinearVelocity,solverBodyA.mDeltaAngularVelocity,solverBodyA.mMassInv,solverBodyA.mInertiaInv,rA,
+		solverBodyB.mDeltaLinearVelocity,solverBodyB.mDeltaAngularVelocity,solverBodyB.mMassInv,solverBodyB.mInertiaInv,rB);
+}
+
+
+void CustomSolveConstraintsTaskParallel(
+	const PfxParallelGroup *contactParallelGroup,const PfxParallelBatch *contactParallelBatches,
+	PfxConstraintPair *contactPairs,uint32_t numContactPairs,
+	btPersistentManifold* offsetContactManifolds,
+	btConstraintRow*	offsetContactConstraintRows,
+	const PfxParallelGroup *jointParallelGroup,const PfxParallelBatch *jointParallelBatches,
+	PfxConstraintPair *jointPairs,uint32_t numJointPairs,
+	btSolverConstraint* offsetSolverConstraints,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	int iteration,unsigned int taskId,unsigned int numTasks,btBarrier *barrier)
+{
+
+	PfxSolverBody staticBody;
+	staticBody.mMassInv = 0.f;
+	staticBody.mDeltaAngularVelocity=vmVector3(0,0,0);
+	staticBody.mDeltaLinearVelocity =vmVector3(0,0,0);
+
+
+	for(int k=0;k<iteration+1;k++) {
+		// Joint
+		for(uint32_t phaseId=0;phaseId<jointParallelGroup->numPhases;phaseId++) {
+			for(uint32_t batchId=0;batchId<jointParallelGroup->numBatches[phaseId];batchId++) {
+				uint32_t numPairs = jointParallelGroup->numPairs[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+				if(batchId%numTasks == taskId && numPairs > 0) {
+					const PfxParallelBatch &batch = jointParallelBatches[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+					for(uint32_t i=0;i<numPairs;i++) {
+						PfxConstraintPair &pair = jointPairs[batch.pairIndices[i]];
+						uint16_t iA = pfxGetRigidBodyIdA(pair);
+						uint16_t iB = pfxGetRigidBodyIdB(pair);
+
+						
+						PfxSolverBody &solverBodyA = iA != 65535 ? offsetSolverBodies[iA] : staticBody;
+						PfxSolverBody &solverBodyB = iB != 65535 ? offsetSolverBodies[iB] : staticBody;
+
+						if(k==0) {
+							
+						}
+						else {
+							btSolverConstraint* constraintRow = &offsetSolverConstraints[pfxGetContactId1(pair)];
+							int numRows = pfxGetNumConstraints(pair);
+							int i;
+							for (i=0;i<numRows;i++)
+							{
+								resolveSingleConstraintRowGeneric(solverBodyA,solverBodyB,constraintRow[i]);
+							}
+							
+						}
+					}
+				}
+			}
+
+			barrier->sync();
+		}
+
+		// Contact
+		for(uint32_t phaseId=0;phaseId<contactParallelGroup->numPhases;phaseId++) {
+			for(uint32_t batchId=0;batchId<contactParallelGroup->numBatches[phaseId];batchId++) {
+				uint32_t numPairs = contactParallelGroup->numPairs[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+				if(batchId%numTasks == taskId && numPairs > 0) {
+					const PfxParallelBatch &batch = contactParallelBatches[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+					for(uint32_t i=0;i<numPairs;i++) {
+						PfxConstraintPair &pair = contactPairs[batch.pairIndices[i]];
+						uint16_t iA = pfxGetRigidBodyIdA(pair);
+						uint16_t iB = pfxGetRigidBodyIdB(pair);
+
+						uint32_t contactIndex = pfxGetConstraintId1(pair);
+						btPersistentManifold& contact = offsetContactManifolds[contactIndex];
+						btConstraintRow* contactConstraintRows = &offsetContactConstraintRows[contactIndex*12];
+
+						PfxSolverBody &solverBodyA = offsetSolverBodies[iA];
+						PfxSolverBody &solverBodyB = offsetSolverBodies[iB];
+						
+						for(int j=0;j<contact.getNumContacts();j++) {
+							btManifoldPoint& cp = contact.getContactPoint(j);
+							
+							if(k==0) {
+								vmVector3 rA = rotate(solverBodyA.mOrientation,btReadVector3(cp.m_localPointA));
+								vmVector3 rB = rotate(solverBodyB.mOrientation,btReadVector3(cp.m_localPointB));
+								
+								float imp[3] = 
+								{
+									cp.m_appliedImpulse,
+									cp.m_appliedImpulseLateral1,
+									cp.m_appliedImpulseLateral2
+								};
+								for(int k=0;k<3;k++) 
+								{
+									vmVector3 normal = btReadVector3(contactConstraintRows[j*3+k].m_normal);
+									contactConstraintRows[j*3+k].m_accumImpulse = imp[k];
+									float deltaImpulse = contactConstraintRows[j*3+k].m_accumImpulse;
+									solverBodyA.mDeltaLinearVelocity += deltaImpulse * solverBodyA.mMassInv * normal;
+									solverBodyA.mDeltaAngularVelocity += deltaImpulse * solverBodyA.mInertiaInv * cross(rA,normal);
+									solverBodyB.mDeltaLinearVelocity -= deltaImpulse * solverBodyB.mMassInv * normal;
+									solverBodyB.mDeltaAngularVelocity -= deltaImpulse * solverBodyB.mInertiaInv * cross(rB,normal);
+								}
+							}
+							else {
+								btSolveContactConstraint(
+									contactConstraintRows[j*3],
+									contactConstraintRows[j*3+1],
+									contactConstraintRows[j*3+2],
+									btReadVector3(cp.m_localPointA),
+									btReadVector3(cp.m_localPointB),
+									solverBodyA,
+									solverBodyB,
+									cp.m_combinedFriction
+									);
+							}
+						}
+					}
+				}
+			}
+
+			if (barrier)
+				barrier->sync();
+		}
+	}
+}
+
+void CustomPostSolverTask(
+	TrbState *states,
+	PfxSolverBody *solverBodies,
+	uint32_t numRigidBodies)
+{
+	for(uint32_t i=0;i<numRigidBodies;i++) {
+		TrbState &state = states[i];
+		PfxSolverBody &solverBody = solverBodies[i];
+		state.setLinearVelocity(state.getLinearVelocity()+solverBody.mDeltaLinearVelocity);
+		state.setAngularVelocity(state.getAngularVelocity()+solverBody.mDeltaAngularVelocity);
+	}
+}
+
+void*	SolverlsMemoryFunc()
+{
+	//don't create local store memory, just return 0
+	return 0;
+}
+
+
+static SIMD_FORCE_INLINE
+void pfxGetPlaneSpace(const vmVector3& n, vmVector3& p, vmVector3& q)
+{
+	if(fabsf(n[2]) > 0.707f) {
+		// choose p in y-z plane
+		float a = n[1]*n[1] + n[2]*n[2];
+		float k = 1.0f/sqrtf(a);
+		p[0] = 0;
+		p[1] = -n[2]*k;
+		p[2] = n[1]*k;
+		// set q = n x p
+		q[0] = a*k;
+		q[1] = -n[0]*p[2];
+		q[2] = n[0]*p[1];
+	}
+	else {
+		// choose p in x-y plane
+		float a = n[0]*n[0] + n[1]*n[1];
+		float k = 1.0f/sqrtf(a);
+		p[0] = -n[1]*k;
+		p[1] = n[0]*k;
+		p[2] = 0;
+		// set q = n x p
+		q[0] = -n[2]*p[1];
+		q[1] = n[2]*p[0];
+		q[2] = a*k;
+	}
+}
+
+
+
+#define PFX_CONTACT_SLOP 0.001f
+
+void btSetupContactConstraint(
+	btConstraintRow &constraintResponse,
+	btConstraintRow &constraintFriction1,
+	btConstraintRow &constraintFriction2,
+	float penetrationDepth,
+	float restitution,
+	float friction,
+	const vmVector3 &contactNormal,
+	const vmVector3 &contactPointA,
+	const vmVector3 &contactPointB,
+	const TrbState &stateA,
+	const TrbState &stateB,
+	PfxSolverBody &solverBodyA,
+	PfxSolverBody &solverBodyB,
+	const vmVector3& linVelA, 
+	const vmVector3& angVelA,
+	const vmVector3& linVelB, 
+	const vmVector3& angVelB,
+
+	float separateBias,
+	float timeStep
+	)
+{
+	vmVector3 rA = rotate(solverBodyA.mOrientation,contactPointA);
+	vmVector3 rB = rotate(solverBodyB.mOrientation,contactPointB);
+
+	vmMatrix3 K = vmMatrix3::scale(vmVector3(solverBodyA.mMassInv + solverBodyB.mMassInv)) - 
+			crossMatrix(rA) * solverBodyA.mInertiaInv * crossMatrix(rA) - 
+			crossMatrix(rB) * solverBodyB.mInertiaInv * crossMatrix(rB);
+	
+	//use the velocities without the applied (gravity and external) forces for restitution computation
+	vmVector3 vArestitution = linVelA + cross(angVelA,rA);
+	vmVector3 vBrestitution = linVelB + cross(angVelB,rB);
+	vmVector3 vABrestitution = vArestitution-vBrestitution;
+
+	vmVector3 vA = stateA.getLinearVelocity() + cross(stateA.getAngularVelocity(),rA);
+	vmVector3 vB = stateB.getLinearVelocity() + cross(stateB.getAngularVelocity(),rB);
+	vmVector3 vAB = vA-vB;
+
+
+	vmVector3 tangent1,tangent2;
+	btPlaneSpace1(contactNormal,tangent1,tangent2);
+
+//	constraintResponse.m_accumImpulse = 0.f;
+//	constraintFriction1.m_accumImpulse = 0.f;
+//	constraintFriction2.m_accumImpulse = 0.f;
+
+	// Contact Constraint
+	{
+		vmVector3 normal = contactNormal;
+
+		float denom = dot(K*normal,normal);
+
+		constraintResponse.m_rhs = -(1.0f+restitution)*dot(vAB,normal); // velocity error
+		constraintResponse.m_rhs -= (separateBias * btMin(0.0f,penetrationDepth+PFX_CONTACT_SLOP)) / timeStep; // position error
+		constraintResponse.m_rhs /= denom;
+		constraintResponse.m_jacDiagInv = 1.0f/denom;
+		constraintResponse.m_lowerLimit = 0.0f;
+		constraintResponse.m_upperLimit = SIMD_INFINITY;
+		btStoreVector3(normal,constraintResponse.m_normal);
+	}
+
+	// Friction Constraint 1
+	{
+		vmVector3 normal = tangent1;
+
+		float denom = dot(K*normal,normal);
+
+		constraintFriction1.m_jacDiagInv = 1.0f/denom;
+		constraintFriction1.m_rhs = -dot(vAB,normal);
+		constraintFriction1.m_rhs *= constraintFriction1.m_jacDiagInv;
+		constraintFriction1.m_lowerLimit = 0.0f;
+		constraintFriction1.m_upperLimit = SIMD_INFINITY;
+		btStoreVector3(normal,constraintFriction1.m_normal);
+	}
+	
+	// Friction Constraint 2
+	{
+		vmVector3 normal = tangent2;
+
+		float denom = dot(K*normal,normal);
+
+		constraintFriction2.m_jacDiagInv = 1.0f/denom;
+		constraintFriction2.m_rhs = -dot(vAB,normal);
+		constraintFriction2.m_rhs *= constraintFriction2.m_jacDiagInv;
+		constraintFriction2.m_lowerLimit = 0.0f;
+		constraintFriction2.m_upperLimit = SIMD_INFINITY;
+		btStoreVector3(normal,constraintFriction2.m_normal);
+	}
+}
+
+
+void CustomSetupContactConstraintsTask(
+	PfxConstraintPair *contactPairs,uint32_t numContactPairs,
+	btPersistentManifold*	offsetContactManifolds,
+	btConstraintRow* offsetContactConstraintRows,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	float separateBias,
+	float timeStep)
+{
+	for(uint32_t i=0;i<numContactPairs;i++) {
+		PfxConstraintPair &pair = contactPairs[i];
+		if(!pfxGetActive(pair) || pfxGetNumConstraints(pair) == 0 ||
+			((pfxGetMotionMaskA(pair)&PFX_MOTION_MASK_STATIC) && (pfxGetMotionMaskB(pair)&PFX_MOTION_MASK_STATIC)) ) {
+			continue;
+		}
+
+		uint16_t iA = pfxGetRigidBodyIdA(pair);
+		uint16_t iB = pfxGetRigidBodyIdB(pair);
+
+		int id = pfxGetConstraintId1(pair);
+		btPersistentManifold& contact = offsetContactManifolds[id];
+		btConstraintRow* contactConstraintRows = &offsetContactConstraintRows[id*12];
+
+		TrbState &stateA = offsetRigStates[iA];
+//		PfxRigBody &bodyA = offsetRigBodies[iA];
+		PfxSolverBody &solverBodyA = offsetSolverBodies[iA];
+
+		TrbState &stateB = offsetRigStates[iB];
+//		PfxRigBody &bodyB = offsetRigBodies[iB];
+		PfxSolverBody &solverBodyB = offsetSolverBodies[iB];
+		
+		float restitution = 0.5f * (solverBodyA.restitution + solverBodyB.restitution);
+		//if(contact.getDuration() > 1) restitution = 0.0f;
+		
+		float friction = sqrtf(solverBodyA.friction * solverBodyB.friction);
+
+		for(int j=0;j<contact.getNumContacts();j++) {
+			btManifoldPoint& cp = contact.getContactPoint(j);
+
+			//pass the velocities without the applied (gravity and external) forces for restitution computation
+			const btRigidBody* rbA = btRigidBody::upcast(contact.getBody0());
+			const btRigidBody* rbB = btRigidBody::upcast(contact.getBody1());
+
+			btVector3 linVelA, linVelB;
+			btVector3 angVelA, angVelB;
+			
+			if (rbA && (rbA->getInvMass()>0.f))
+			{
+				linVelA = rbA->getLinearVelocity();
+				angVelA = rbA->getAngularVelocity();
+			} else
+			{
+				linVelA.setValue(0,0,0);
+				angVelA.setValue(0,0,0);
+			}
+
+			if (rbB && (rbB->getInvMass()>0.f))
+			{
+				linVelB = rbB->getLinearVelocity();
+				angVelB = rbB->getAngularVelocity();
+			} else
+			{
+				linVelB.setValue(0,0,0);
+				angVelB.setValue(0,0,0);
+			}
+
+
+
+			btSetupContactConstraint(
+				contactConstraintRows[j*3],
+				contactConstraintRows[j*3+1],
+				contactConstraintRows[j*3+2],
+				cp.getDistance(),
+				restitution,
+				friction,
+				btReadVector3(cp.m_normalWorldOnB),//.mConstraintRow[0].m_normal),
+				btReadVector3(cp.m_localPointA),
+				btReadVector3(cp.m_localPointB),
+				stateA,
+				stateB,
+				solverBodyA,
+				solverBodyB,
+				(const vmVector3&)linVelA, (const vmVector3&)angVelA,
+				(const vmVector3&)linVelB, (const vmVector3&)angVelB,
+				separateBias,
+				timeStep
+				);
+		}
+
+		//contact.setCompositeFriction(friction);
+	}
+}
+
+
+void CustomWritebackContactConstraintsTask(
+	PfxConstraintPair *contactPairs,uint32_t numContactPairs,
+	btPersistentManifold*	offsetContactManifolds,
+	btConstraintRow* offsetContactConstraintRows,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	float separateBias,
+	float timeStep)
+{
+	for(uint32_t i=0;i<numContactPairs;i++) {
+		PfxConstraintPair &pair = contactPairs[i];
+		if(!pfxGetActive(pair) || pfxGetNumConstraints(pair) == 0 ||
+			((pfxGetMotionMaskA(pair)&PFX_MOTION_MASK_STATIC) && (pfxGetMotionMaskB(pair)&PFX_MOTION_MASK_STATIC)) ) {
+			continue;
+		}
+		int id = pfxGetConstraintId1(pair);
+		btPersistentManifold& contact = offsetContactManifolds[id];
+		btConstraintRow* contactConstraintRows = &offsetContactConstraintRows[id*12];
+		for(int j=0;j<contact.getNumContacts();j++) {
+			btManifoldPoint& cp = contact.getContactPoint(j);
+			cp.m_appliedImpulse = contactConstraintRows[j*3+0].m_accumImpulse;
+			cp.m_appliedImpulseLateral1 = contactConstraintRows[j*3+1].m_accumImpulse;
+			cp.m_appliedImpulseLateral2 = contactConstraintRows[j*3+2].m_accumImpulse;
+		}
+		//contact.setCompositeFriction(friction);
+	}
+}
+
+void	SolverThreadFunc(void* userPtr,void* lsMemory)
+{
+	btConstraintSolverIO* io = (btConstraintSolverIO*)(userPtr);//arg->io);
+	btCriticalSection* criticalsection = io->setupContactConstraints.criticalSection;
+	
+
+	//CustomCriticalSection *criticalsection = &io->m_cs;
+	switch(io->cmd) {
+
+		case PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS:
+		CustomSolveConstraintsTaskParallel(
+			io->solveConstraints.contactParallelGroup,
+			io->solveConstraints.contactParallelBatches,
+			io->solveConstraints.contactPairs,
+			io->solveConstraints.numContactPairs,
+			io->solveConstraints.offsetContactManifolds,
+			io->solveConstraints.offsetContactConstraintRows,
+
+			io->solveConstraints.jointParallelGroup,
+			io->solveConstraints.jointParallelBatches,
+			io->solveConstraints.jointPairs,
+			io->solveConstraints.numJointPairs,
+			io->solveConstraints.offsetSolverConstraints,
+			io->solveConstraints.offsetRigStates1,
+			io->solveConstraints.offsetSolverBodies,
+			io->solveConstraints.numRigidBodies,
+			io->solveConstraints.iteration,
+
+			io->solveConstraints.taskId,
+			io->maxTasks1,
+			io->solveConstraints.barrier
+			);
+		break;
+
+		case PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER:
+			CustomPostSolverTask(	io->postSolver.states,io->postSolver.solverBodies,	io->postSolver.numRigidBodies);
+			break;
+
+
+		case PFX_CONSTRAINT_SOLVER_CMD_SETUP_CONTACT_CONSTRAINTS:
+		{
+			bool empty = false;
+			while(!empty) {
+				int start,batch;
+				
+				criticalsection->lock();
+
+				start = (int)criticalsection->getSharedParam(0);
+				batch = (int)criticalsection->getSharedParam(1);
+
+				//PFX_PRINTF("taskId %d start %d num %d\n",arg->taskId,start,batch);
+
+				// ���̃o�b�t�@���Z�b�g
+				int nextStart = start + batch;
+				int rest = btMax((int)io->setupContactConstraints.numContactPairs1 - nextStart,0);
+				int nextBatch = (rest > batch)?batch:rest;
+
+				criticalsection->setSharedParam(0,nextStart);
+                criticalsection->setSharedParam(1,nextBatch);
+
+				criticalsection->unlock();
+				
+				if(batch > 0) {
+					CustomSetupContactConstraintsTask(
+						io->setupContactConstraints.offsetContactPairs+start,batch,
+						io->setupContactConstraints.offsetContactManifolds,
+						io->setupContactConstraints.offsetContactConstraintRows,
+						io->setupContactConstraints.offsetRigStates,
+//						io->setupContactConstraints.offsetRigBodies,
+						io->setupContactConstraints.offsetSolverBodies,
+						io->setupContactConstraints.numRigidBodies,
+						io->setupContactConstraints.separateBias,
+						io->setupContactConstraints.timeStep);
+				}
+				else {
+					empty = true;
+				}
+			}
+		}
+		break;
+
+		case PFX_CONSTRAINT_SOLVER_CMD_WRITEBACK_APPLIED_IMPULSES_CONTACT_CONSTRAINTS:
+		{
+			bool empty = false;
+			while(!empty) {
+				int start,batch;
+				
+				criticalsection->lock();
+
+				start = (int)criticalsection->getSharedParam(0);
+				batch = (int)criticalsection->getSharedParam(1);
+
+				//PFX_PRINTF("taskId %d start %d num %d\n",arg->taskId,start,batch);
+
+				// ���̃o�b�t�@���Z�b�g
+				int nextStart = start + batch;
+				int rest = btMax((int)io->setupContactConstraints.numContactPairs1 - nextStart,0);
+				int nextBatch = (rest > batch)?batch:rest;
+
+				criticalsection->setSharedParam(0,nextStart);
+                criticalsection->setSharedParam(1,nextBatch);
+
+				criticalsection->unlock();
+				
+				if(batch > 0) {
+					CustomWritebackContactConstraintsTask(
+						io->setupContactConstraints.offsetContactPairs+start,batch,
+						io->setupContactConstraints.offsetContactManifolds,
+						io->setupContactConstraints.offsetContactConstraintRows,
+						io->setupContactConstraints.offsetRigStates,
+//						io->setupContactConstraints.offsetRigBodies,
+						io->setupContactConstraints.offsetSolverBodies,
+						io->setupContactConstraints.numRigidBodies,
+						io->setupContactConstraints.separateBias,
+						io->setupContactConstraints.timeStep);
+				}
+				else {
+					empty = true;
+				}
+			}
+		}
+		break;
+
+		default:
+			{
+				btAssert(0);
+			}
+	}
+
+}
+
+
+void CustomSetupContactConstraintsNew(
+	PfxConstraintPair *contactPairs1,uint32_t numContactPairs,
+	btPersistentManifold *offsetContactManifolds,
+	btConstraintRow* offsetContactConstraintRows,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	float separationBias,
+	float timeStep,
+	class btThreadSupportInterface* threadSupport,
+	btCriticalSection* criticalSection,
+	btConstraintSolverIO *io ,
+	uint8_t cmd
+	)
+{
+	int maxTasks = threadSupport->getNumTasks();
+
+	int div = (int)maxTasks * 4;
+	int batch = ((int)numContactPairs + div - 1) / div;
+#ifdef __PPU__
+		BulletPE2ConstraintSolverSpursSupport* spursThread = (BulletPE2ConstraintSolverSpursSupport*) threadSupport;
+#endif
+	if (criticalSection)
+	{
+		criticalSection->setSharedParam(0,0);
+		criticalSection->setSharedParam(1,btMin(batch,64)); // batched number
+	} else
+	{
+#ifdef __PPU__
+		spursThread->setSharedParam(0,0);
+		spursThread->setSharedParam(1,btMin(batch,64)); // batched number
+#endif //__PPU__
+	}
+
+	for(int t=0;t<maxTasks;t++) {
+		io[t].cmd = cmd;
+		io[t].setupContactConstraints.offsetContactPairs = contactPairs1;
+		io[t].setupContactConstraints.numContactPairs1 = numContactPairs;
+		io[t].setupContactConstraints.offsetRigStates = offsetRigStates;
+		io[t].setupContactConstraints.offsetContactManifolds = offsetContactManifolds;	
+		io[t].setupContactConstraints.offsetContactConstraintRows = offsetContactConstraintRows;
+		io[t].setupContactConstraints.offsetSolverBodies = offsetSolverBodies;
+		io[t].setupContactConstraints.numRigidBodies = numRigidBodies;
+		io[t].setupContactConstraints.separateBias = separationBias;
+		io[t].setupContactConstraints.timeStep = timeStep;
+		io[t].setupContactConstraints.criticalSection = criticalSection;
+		io[t].maxTasks1 = maxTasks;
+#ifdef __PPU__
+		io[t].barrierAddr2 = (unsigned int)spursThread->getBarrierAddress();
+		io[t].criticalsectionAddr2 = (unsigned int)spursThread->getCriticalSectionAddress();
+#endif
+	
+
+//#define SEQUENTIAL_SETUP
+#ifdef SEQUENTIAL_SETUP
+		CustomSetupContactConstraintsTask(contactPairs1,numContactPairs,offsetContactManifolds,offsetRigStates,offsetSolverBodies,numRigidBodies,separationBias,timeStep);
+#else
+		threadSupport->sendRequest(1,(ppu_address_t)&io[t],t);
+#endif
+
+	}
+#ifndef SEQUENTIAL_SETUP
+	unsigned int arg0,arg1;
+	for(int t=0;t<maxTasks;t++) {
+		arg0 = t;
+		threadSupport->waitForResponse(&arg0,&arg1);
+	}
+#endif //SEQUENTIAL_SETUP
+
+}
+
+
+void CustomSplitConstraints(
+	PfxConstraintPair *pairs,uint32_t numPairs,
+	PfxParallelGroup &group,PfxParallelBatch *batches,
+	uint32_t numTasks,
+	uint32_t numRigidBodies,
+	void *poolBuff,
+	uint32_t poolBytes
+	)
+{
+	HeapManager pool((unsigned char*)poolBuff,poolBytes);
+
+	// �X�e�[�g�`�F�b�N�p�r�b�g�t���O�e�[�u��
+	int bufSize = sizeof(uint8_t)*numRigidBodies;
+	bufSize = ((bufSize+127)>>7)<<7; // 128 bytes alignment
+	uint8_t *bodyTable = (uint8_t*)pool.allocate(bufSize,HeapManager::ALIGN128);
+
+	// �y�A�`�F�b�N�p�r�b�g�t���O�e�[�u��
+	uint32_t *pairTable;
+	size_t allocSize = sizeof(uint32_t)*((numPairs+31)/32);
+	pairTable = (uint32_t*)pool.allocate(allocSize);
+	memset(pairTable,0,allocSize);
+
+	// �ڕW�Ƃ��镪����
+	uint32_t targetCount = btMax(uint32_t(PFX_MIN_SOLVER_PAIRS),btMin(numPairs / (numTasks*2),uint32_t(PFX_MAX_SOLVER_PAIRS)));
+	uint32_t startIndex = 0;
+	
+	uint32_t phaseId;
+	uint32_t batchId;
+	uint32_t totalCount=0;
+	
+	uint32_t maxBatches = btMin(numTasks,uint32_t(PFX_MAX_SOLVER_BATCHES));
+	
+	for(phaseId=0;phaseId<PFX_MAX_SOLVER_PHASES&&totalCount<numPairs;phaseId++) {
+		bool startIndexCheck = true;
+		
+		group.numBatches[phaseId] = 0;
+		
+		uint32_t i = startIndex;
+		
+        // �`�F�b�N�p�r�b�g�t���O�e�[�u�����N���A
+		memset(bodyTable,0xff,bufSize);
+		
+		for(batchId=0;i<numPairs&&totalCount<numPairs&&batchId<maxBatches;batchId++) {
+			uint32_t pairCount=0;
+			
+			PfxParallelBatch &batch = batches[phaseId*PFX_MAX_SOLVER_BATCHES+batchId];
+			uint32_t pairId = 0;
+			
+			for(;i<numPairs&&pairCount<targetCount;i++) {
+				uint32_t idxP = i>>5;
+				uint32_t maskP = 1L << (i & 31);
+				
+				//pair is already assigned to a phase/batch
+				if(pairTable[idxP] & maskP) {
+					continue;
+				}
+				
+				uint32_t idxA = pfxGetRigidBodyIdA(pairs[i]);
+				uint32_t idxB = pfxGetRigidBodyIdB(pairs[i]);
+
+				// �����Ƃ��A�N�e�B�u�łȂ��A�܂��͏Փ˓_���O�̃y�A�͓o�^�Ώۂ���͂���
+				if(!pfxGetActive(pairs[i]) || pfxGetNumConstraints(pairs[i]) == 0 ||
+					((pfxGetMotionMaskA(pairs[i])&PFX_MOTION_MASK_STATIC) && (pfxGetMotionMaskB(pairs[i])&PFX_MOTION_MASK_STATIC)) ) {
+					if(startIndexCheck) 
+						startIndex++;
+					//assign pair -> skip it because it has no constraints
+					pairTable[idxP] |= maskP;
+					totalCount++;
+					continue;
+				}
+				
+				// �ˑ����̃`�F�b�N
+				if( (bodyTable[idxA] != batchId && bodyTable[idxA] != 0xff) || 
+					(bodyTable[idxB] != batchId && bodyTable[idxB] != 0xff) ) {
+					startIndexCheck = false;
+					//bodies of the pair are already assigned to another batch within this phase
+					continue;
+				}
+				
+				// �ˑ�������e�[�u���ɓo�^
+				if(pfxGetMotionMaskA(pairs[i])&PFX_MOTION_MASK_DYNAMIC) 
+						bodyTable[idxA] = batchId;
+				if(pfxGetMotionMaskB(pairs[i])&PFX_MOTION_MASK_DYNAMIC) 
+						bodyTable[idxB] = batchId;
+				
+				if(startIndexCheck) 
+					startIndex++;
+				
+				pairTable[idxP] |= maskP;
+				//add the pair 'i' to the current batch
+				batch.pairIndices[pairId++] = i;
+				pairCount++;
+			}
+
+			group.numPairs[phaseId*PFX_MAX_SOLVER_BATCHES+batchId] = (uint16_t)pairId;
+			totalCount += pairCount;
+		}
+
+		group.numBatches[phaseId] = batchId;
+	}
+
+	group.numPhases = phaseId;
+
+	pool.clear();
+}
+
+
+
+void CustomSolveConstraintsParallel(
+	PfxConstraintPair *contactPairs,uint32_t numContactPairs,
+	
+	PfxConstraintPair *jointPairs,uint32_t numJointPairs,
+	btPersistentManifold* offsetContactManifolds,
+	btConstraintRow* offsetContactConstraintRows,
+	btSolverConstraint* offsetSolverConstraints,
+	TrbState *offsetRigStates,
+	PfxSolverBody *offsetSolverBodies,
+	uint32_t numRigidBodies,
+	struct btConstraintSolverIO* io,
+	class btThreadSupportInterface* threadSupport,
+	int iteration,
+	void* poolBuf,
+	int poolBytes,
+	class btBarrier* barrier)
+	{
+
+	int maxTasks = threadSupport->getNumTasks();
+//	config.taskManager->setTaskEntry(PFX_SOLVER_ENTRY);
+
+	HeapManager pool((unsigned char*)poolBuf,poolBytes);
+
+	{
+		PfxParallelGroup *cgroup = (PfxParallelGroup*)pool.allocate(sizeof(PfxParallelGroup));
+		PfxParallelBatch *cbatches = (PfxParallelBatch*)pool.allocate(sizeof(PfxParallelBatch)*(PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES),128);
+		PfxParallelGroup *jgroup = (PfxParallelGroup*)pool.allocate(sizeof(PfxParallelGroup));
+		PfxParallelBatch *jbatches = (PfxParallelBatch*)pool.allocate(sizeof(PfxParallelBatch)*(PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES),128);
+		
+		uint32_t tmpBytes = poolBytes - 2 * (sizeof(PfxParallelGroup) + sizeof(PfxParallelBatch)*(PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES) + 128);
+		void *tmpBuff = pool.allocate(tmpBytes);
+		
+		{
+			BT_PROFILE("CustomSplitConstraints");
+			CustomSplitConstraints(contactPairs,numContactPairs,*cgroup,cbatches,maxTasks,numRigidBodies,tmpBuff,tmpBytes);
+			CustomSplitConstraints(jointPairs,numJointPairs,*jgroup,jbatches,maxTasks,numRigidBodies,tmpBuff,tmpBytes);
+		}
+
+		{
+			BT_PROFILE("PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS");
+//#define SOLVE_SEQUENTIAL
+#ifdef SOLVE_SEQUENTIAL
+		CustomSolveConstraintsTask(
+			io->solveConstraints.contactParallelGroup,
+			io->solveConstraints.contactParallelBatches,
+			io->solveConstraints.contactPairs,
+			io->solveConstraints.numContactPairs,
+			io->solveConstraints.offsetContactManifolds,
+
+			io->solveConstraints.jointParallelGroup,
+			io->solveConstraints.jointParallelBatches,
+			io->solveConstraints.jointPairs,
+			io->solveConstraints.numJointPairs,
+			io->solveConstraints.offsetSolverConstraints,
+
+			io->solveConstraints.offsetRigStates1,
+			io->solveConstraints.offsetSolverBodies,
+			io->solveConstraints.numRigidBodies,
+			io->solveConstraints.iteration,0,1,0);//arg->taskId,1,0);//,arg->maxTasks,arg->barrier);
+#else
+		for(int t=0;t<maxTasks;t++) {
+			io[t].cmd = PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS;
+			io[t].solveConstraints.contactParallelGroup = cgroup;
+			io[t].solveConstraints.contactParallelBatches = cbatches;
+			io[t].solveConstraints.contactPairs = contactPairs;
+			io[t].solveConstraints.numContactPairs = numContactPairs;
+			io[t].solveConstraints.offsetContactManifolds = offsetContactManifolds;
+			io[t].solveConstraints.offsetContactConstraintRows = offsetContactConstraintRows;
+			io[t].solveConstraints.jointParallelGroup = jgroup;
+			io[t].solveConstraints.jointParallelBatches = jbatches;
+			io[t].solveConstraints.jointPairs = jointPairs;
+			io[t].solveConstraints.numJointPairs = numJointPairs;
+			io[t].solveConstraints.offsetSolverConstraints = offsetSolverConstraints;
+			io[t].solveConstraints.offsetRigStates1 = offsetRigStates;
+			io[t].solveConstraints.offsetSolverBodies = offsetSolverBodies;
+			io[t].solveConstraints.numRigidBodies = numRigidBodies;
+			io[t].solveConstraints.iteration = iteration;
+			io[t].solveConstraints.taskId = t;
+			io[t].solveConstraints.barrier = barrier;
+
+		io[t].maxTasks1 = maxTasks;
+#ifdef __PPU__
+		BulletPE2ConstraintSolverSpursSupport* spursThread = (BulletPE2ConstraintSolverSpursSupport*) threadSupport;
+		io[t].barrierAddr2 = (unsigned int) spursThread->getBarrierAddress();
+		io[t].criticalsectionAddr2 = (unsigned int)spursThread->getCriticalSectionAddress();
+#endif
+
+			threadSupport->sendRequest(1,(ppu_address_t)&io[t],t);
+		}
+
+		unsigned int arg0,arg1;
+		for(int t=0;t<maxTasks;t++) {
+			arg0 = t;
+			threadSupport->waitForResponse(&arg0,&arg1);
+		}
+#endif
+		}
+		pool.clear();
+	}
+
+	{
+			BT_PROFILE("PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER");
+		int batch = ((int)numRigidBodies + maxTasks - 1) / maxTasks;
+		int rest = (int)numRigidBodies;
+		int start = 0;
+
+		for(int t=0;t<maxTasks;t++) {
+			int num = (rest - batch ) > 0 ? batch : rest;
+			io[t].cmd = PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER;
+			io[t].postSolver.states = offsetRigStates + start;
+			io[t].postSolver.solverBodies = offsetSolverBodies + start;
+			io[t].postSolver.numRigidBodies = (uint32_t)num;
+		io[t].maxTasks1 = maxTasks;
+#ifdef __PPU__
+		BulletPE2ConstraintSolverSpursSupport* spursThread = (BulletPE2ConstraintSolverSpursSupport*) threadSupport;
+		io[t].barrierAddr2 = (unsigned int)spursThread->getBarrierAddress();
+		io[t].criticalsectionAddr2 = (unsigned int)spursThread->getCriticalSectionAddress();
+#endif
+
+#ifdef SOLVE_SEQUENTIAL
+			CustomPostSolverTask(	io[t].postSolver.states,io[t].postSolver.solverBodies,	io[t].postSolver.numRigidBodies);
+#else
+			threadSupport->sendRequest(1,(ppu_address_t)&io[t],t);
+#endif
+			rest -= num;
+			start += num;
+		}
+
+		unsigned int arg0,arg1;
+		for(int t=0;t<maxTasks;t++) {
+#ifndef SOLVE_SEQUENTIAL
+			arg0 = t;
+			threadSupport->waitForResponse(&arg0,&arg1);
+#endif
+		}
+	}
+
+}
+
+
+
+void BPE_customConstraintSolverSequentialNew(unsigned int new_num, PfxBroadphasePair *new_pairs1 ,
+									btPersistentManifold* offsetContactManifolds,
+									PfxConstraintRow*	offsetContactConstraintRows,
+									  TrbState* states,int numRigidBodies, 
+									  struct PfxSolverBody* solverBodies, 
+									  PfxConstraintPair* jointPairs, unsigned int numJoints,
+									  btSolverConstraint* offsetSolverConstraints,
+									  float separateBias,
+									  float timeStep,
+									  int iteration,
+									  btThreadSupportInterface* solverThreadSupport,
+									  btCriticalSection* criticalSection,
+									  struct btConstraintSolverIO* solverIO,
+									  btBarrier* barrier
+									  )
+{
+
+	{
+		BT_PROFILE("pfxSetupConstraints");
+
+		for(uint32_t i=0;i<numJoints;i++) {
+			// ���̍X�V
+			PfxConstraintPair &pair = jointPairs[i];
+			int idA = pfxGetRigidBodyIdA(pair);
+
+			if (idA != 65535)
+			{
+				pfxSetMotionMaskA(pair,states[pfxGetRigidBodyIdA(pair)].getMotionMask());
+			}
+			else
+			{
+				pfxSetMotionMaskA(pair,PFX_MOTION_MASK_STATIC);
+			}
+			int idB = pfxGetRigidBodyIdB(pair);
+			if (idB!= 65535)
+			{
+				pfxSetMotionMaskB(pair,states[pfxGetRigidBodyIdB(pair)].getMotionMask());
+			} else
+			{
+				pfxSetMotionMaskB(pair,PFX_MOTION_MASK_STATIC);
+			}
+		}
+
+//		CustomSetupJointConstraintsSeq(			jointPairs,numJoints,joints,			states,			solverBodies,			numRigidBodies,			timeStep);
+
+#ifdef SEQUENTIAL_SETUP
+		CustomSetupContactConstraintsSeqNew(
+			(PfxConstraintPair*)new_pairs1,new_num,contacts,
+			states,
+			solverBodies,
+			numRigidBodies,
+			separateBias,
+			timeStep);
+#else
+
+		CustomSetupContactConstraintsNew(
+			(PfxConstraintPair*)new_pairs1,new_num,
+			offsetContactManifolds,
+			offsetContactConstraintRows,
+			states,
+			solverBodies,
+			numRigidBodies,
+			separateBias,
+			timeStep,
+			solverThreadSupport,
+			criticalSection,solverIO,
+			PFX_CONSTRAINT_SOLVER_CMD_SETUP_CONTACT_CONSTRAINTS
+			);
+
+#endif //SEQUENTIAL_SETUP
+
+	}
+	{
+		BT_PROFILE("pfxSolveConstraints");
+
+//#define SEQUENTIAL
+#ifdef SEQUENTIAL
+		CustomSolveConstraintsSeq(
+			(PfxConstraintPair*)new_pairs1,new_num,contacts,
+			jointPairs,numJoints,
+			states,
+			solverBodies,
+			numRigidBodies,
+			separateBias,
+			timeStep,
+			iteration);
+#else //SEQUENTIAL
+		CustomSolveConstraintsParallel(
+			(PfxConstraintPair*)new_pairs1,new_num,
+			jointPairs,numJoints,
+			offsetContactManifolds,
+			offsetContactConstraintRows,
+			offsetSolverConstraints,
+			states,
+			solverBodies,
+			numRigidBodies,
+			solverIO, solverThreadSupport,
+			iteration,
+			tmp_buff,
+			TMP_BUFF_BYTES,
+			barrier
+			);
+
+#endif //SEQUENTIAL
+	}
+
+	{
+		BT_PROFILE("writeback appliedImpulses");
+
+		CustomSetupContactConstraintsNew(
+			(PfxConstraintPair*)new_pairs1,new_num,
+			offsetContactManifolds,
+			offsetContactConstraintRows,
+			states,
+			solverBodies,
+			numRigidBodies,
+			separateBias,
+			timeStep,
+			solverThreadSupport,
+			criticalSection,solverIO,
+			PFX_CONSTRAINT_SOLVER_CMD_WRITEBACK_APPLIED_IMPULSES_CONTACT_CONSTRAINTS
+			);
+	}
+
+}
+
+
+struct	btParallelSolverMemoryCache
+{
+	btAlignedObjectArray<TrbState>	m_mystates;
+	btAlignedObjectArray<PfxSolverBody>  m_mysolverbodies;
+	btAlignedObjectArray<PfxBroadphasePair> m_mypairs;
+	btAlignedObjectArray<PfxConstraintPair> m_jointPairs;
+	btAlignedObjectArray<PfxConstraintRow> m_constraintRows;
+	
+};
+
+
+btConstraintSolverIO* createSolverIO(int numThreads)
+{
+	return new btConstraintSolverIO[numThreads];
+}
+
+btParallelConstraintSolver::btParallelConstraintSolver(btThreadSupportInterface* solverThreadSupport)
+{
+	
+	m_solverThreadSupport = solverThreadSupport;//createSolverThreadSupport(maxNumThreads);
+	m_solverIO = createSolverIO(m_solverThreadSupport->getNumTasks());
+
+	m_barrier = m_solverThreadSupport->createBarrier();
+	m_criticalSection = m_solverThreadSupport->createCriticalSection();
+
+	m_memoryCache = new btParallelSolverMemoryCache();
+}
+	
+btParallelConstraintSolver::~btParallelConstraintSolver()
+{
+	delete m_memoryCache;
+	delete m_solverIO;
+	m_solverThreadSupport->deleteBarrier(m_barrier);
+	m_solverThreadSupport->deleteCriticalSection(m_criticalSection);
+}
+
+
+
+btScalar btParallelConstraintSolver::solveGroup(btCollisionObject** bodies1,int numRigidBodies,btPersistentManifold** manifoldPtr,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher)
+{
+	
+/*	int sz = sizeof(PfxSolverBody);
+	int sz2 = sizeof(vmVector3);
+	int sz3 = sizeof(vmMatrix3);
+	int sz4 = sizeof(vmQuat);
+	int sz5 = sizeof(btConstraintRow);
+	int sz6 = sizeof(btSolverConstraint);
+	int sz7 = sizeof(TrbState);
+*/
+
+	btPersistentManifold* offsetContactManifolds= (btPersistentManifold*) dispatcher->getInternalManifoldPool()->getPoolAddress();
+
+		
+	m_memoryCache->m_mysolverbodies.resize(numRigidBodies);
+	m_memoryCache->m_mystates.resize(numRigidBodies);
+
+	{
+			BT_PROFILE("create states and solver bodies");
+	for (int i=0;i<numRigidBodies;i++)
+	{
+		btCollisionObject* obj = bodies1[i];
+		obj->setCompanionId(i);
+
+		PfxSolverBody& solverBody = m_memoryCache->m_mysolverbodies[i];
+		btRigidBody* rb = btRigidBody::upcast(obj);
+		TrbState& state = m_memoryCache->m_mystates[i];
+	
+		state.reset();
+		const btQuaternion& orgOri = obj->getWorldTransform().getRotation();
+		vmQuat orn(orgOri.getX(),orgOri.getY(),orgOri.getZ(),orgOri.getW());
+		state.setPosition(getVmVector3(obj->getWorldTransform().getOrigin()));
+		state.setOrientation(orn);
+		state.setPosition(state.getPosition());
+		state.setRigidBodyId(i);
+		state.setAngularDamping(0);
+		state.setLinearDamping(0);
+		
+		
+		solverBody.mOrientation = state.getOrientation();
+		solverBody.mDeltaLinearVelocity = vmVector3(0.0f);
+		solverBody.mDeltaAngularVelocity = vmVector3(0.0f);
+		solverBody.friction = obj->getFriction();
+		solverBody.restitution = obj->getRestitution();
+		
+		state.resetSleepCount();
+		
+		//if(state.getMotionMask()&PFX_MOTION_MASK_DYNAMIC) {
+		if (rb && (rb->getInvMass()>0.f))
+		{
+			btVector3 angVelPlusForces = rb->getAngularVelocity()+rb->getTotalTorque()*rb->getInvInertiaTensorWorld()*infoGlobal.m_timeStep;
+			btVector3 linVelPlusForces = rb->getLinearVelocity()+rb->getTotalForce()*rb->getInvMass()*infoGlobal.m_timeStep;
+
+			state.setAngularVelocity((const vmVector3&)angVelPlusForces);
+			state.setLinearVelocity((const vmVector3&) linVelPlusForces);
+
+			state.setMotionType(PfxMotionTypeActive);
+			vmMatrix3 ori(solverBody.mOrientation);
+			vmMatrix3 localInvInertia = vmMatrix3::identity();
+			localInvInertia.setCol(0,vmVector3(rb->getInvInertiaDiagLocal().getX(),0,0));
+			localInvInertia.setCol(1,vmVector3(0, rb->getInvInertiaDiagLocal().getY(),0));
+			localInvInertia.setCol(2,vmVector3(0,0, rb->getInvInertiaDiagLocal().getZ()));
+
+			solverBody.mMassInv = rb->getInvMass();
+			solverBody.mInertiaInv = ori * localInvInertia * transpose(ori);
+		} else
+		{
+			state.setAngularVelocity(vmVector3(0));
+			state.setLinearVelocity(vmVector3(0));
+		
+			state.setMotionType(PfxMotionTypeFixed);
+			m_memoryCache->m_mysolverbodies[i].mMassInv = 0.f;
+			m_memoryCache->m_mysolverbodies[i].mInertiaInv = vmMatrix3(0.0f);
+		}
+
+	}
+	}
+
+
+
+	int totalPoints = 0;
+#ifndef USE_C_ARRAYS
+	m_memoryCache->m_mypairs.resize(numManifolds);
+	//4 points per manifold and 3 rows per point makes 12 rows per manifold
+	m_memoryCache->m_constraintRows.resize(numManifolds*12);
+	m_memoryCache->m_jointPairs.resize(numConstraints);
+#endif//USE_C_ARRAYS
+
+	int actualNumManifolds= 0;
+	{
+		BT_PROFILE("convert manifolds");
+		for (int i1=0;i1<numManifolds;i1++)
+		{
+			if (manifoldPtr[i1]->getNumContacts()>0)
+			{
+				btPersistentManifold* m = manifoldPtr[i1];
+				btCollisionObject* obA = (btCollisionObject*)m->getBody0();
+				btCollisionObject* obB = (btCollisionObject*)m->getBody1();
+				bool obAisActive = !obA->isStaticOrKinematicObject() && obA->isActive();
+				bool obBisActive = !obB->isStaticOrKinematicObject() && obB->isActive();
+
+				if (!obAisActive && !obBisActive)
+					continue;
+
+
+				//int contactId = i1;//actualNumManifolds;
+				
+				PfxBroadphasePair& pair = m_memoryCache->m_mypairs[actualNumManifolds];
+				//init those
+		//		float compFric = obA->getFriction()*obB->getFriction();//@todo
+				int idA = obA->getCompanionId();
+				int idB = obB->getCompanionId();
+				
+				m->m_companionIdA = idA;
+				m->m_companionIdB = idB;
+				
+				
+			//	if ((mysolverbodies[idA].mMassInv!=0)&&(mysolverbodies[idB].mMassInv!=0))
+			//		continue;
+				int numPosPoints=0;
+				for (int p=0;p<m->getNumContacts();p++)
+				{
+					//btManifoldPoint& pt = m->getContactPoint(p);
+					//float dist = pt.getDistance();
+					//if (dist<0.001)
+						numPosPoints++;
+				}
+
+				
+				totalPoints+=numPosPoints;
+				pfxSetRigidBodyIdA(pair,idA);
+				pfxSetRigidBodyIdB(pair,idB);
+				pfxSetMotionMaskA(pair,m_memoryCache->m_mystates[idA].getMotionMask());
+				pfxSetMotionMaskB(pair,m_memoryCache->m_mystates[idB].getMotionMask());
+				pfxSetActive(pair,numPosPoints>0);
+				
+				pfxSetBroadphaseFlag(pair,0);
+				int contactId = m-offsetContactManifolds;
+				//likely the contact pool is not contiguous, make sure to allocate large enough contact pool
+				btAssert(contactId>=0);
+				btAssert(contactId<dispatcher->getInternalManifoldPool()->getMaxCount());
+				
+				pfxSetContactId(pair,contactId);
+				pfxSetNumConstraints(pair,numPosPoints);//manifoldPtr[i]->getNumContacts());
+				actualNumManifolds++;
+			}
+
+		}
+	}
+
+	PfxConstraintPair* jointPairs=0;
+	jointPairs = numConstraints? &m_memoryCache->m_jointPairs[0]:0;
+	int actualNumJoints=0;
+
+
+	btSolverConstraint* offsetSolverConstraints = 0;
+
+	//if (1)
+	{
+		
+		{
+			BT_PROFILE("convert constraints");
+
+			int totalNumRows = 0;
+			int i;
+			
+			m_tmpConstraintSizesPool.resize(numConstraints);
+			//calculate the total number of contraint rows
+			for (i=0;i<numConstraints;i++)
+			{
+				btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
+				constraints[i]->getInfo1(&info1);
+				totalNumRows += info1.m_numConstraintRows;
+			}
+			m_tmpSolverNonContactConstraintPool.resize(totalNumRows);
+			offsetSolverConstraints =totalNumRows? &m_tmpSolverNonContactConstraintPool[0]:0;
+
+			
+			///setup the btSolverConstraints
+			int currentRow = 0;
+
+			for (i=0;i<numConstraints;i++)
+			{
+				const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
+				
+				if (info1.m_numConstraintRows)
+				{
+					btAssert(currentRow<totalNumRows);
+					btTypedConstraint* constraint = constraints[i];
+					btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
+
+					btRigidBody& rbA = constraint->getRigidBodyA();
+					btRigidBody& rbB = constraint->getRigidBodyB();
+			
+					int idA = constraint->getRigidBodyA().getCompanionId();
+					int idB = constraint->getRigidBodyB().getCompanionId();
+			
+					
+					int j;
+					for ( j=0;j<info1.m_numConstraintRows;j++)
+					{
+						memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
+						currentConstraintRow[j].m_lowerLimit = -FLT_MAX;
+						currentConstraintRow[j].m_upperLimit = FLT_MAX;
+						currentConstraintRow[j].m_appliedImpulse = 0.f;
+						currentConstraintRow[j].m_appliedPushImpulse = 0.f;
+						currentConstraintRow[j].m_solverBodyIdA = idA;
+						currentConstraintRow[j].m_solverBodyIdB = idB;
+					}
+
+				
+
+
+
+					btTypedConstraint::btConstraintInfo2 info2;
+					info2.fps = 1.f/infoGlobal.m_timeStep;
+					info2.erp = infoGlobal.m_erp;
+					info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
+					info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
+					info2.m_J2linearAxis = 0;
+					info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
+					info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
+					///the size of btSolverConstraint needs be a multiple of btScalar
+					btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
+					info2.m_constraintError = &currentConstraintRow->m_rhs;
+					currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
+					info2.cfm = &currentConstraintRow->m_cfm;
+					info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
+					info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
+					info2.m_numIterations = infoGlobal.m_numIterations;
+					constraints[i]->getInfo2(&info2);
+
+					
+				
+
+					///finalize the constraint setup
+					for ( j=0;j<info1.m_numConstraintRows;j++)
+					{
+						btSolverConstraint& solverConstraint = currentConstraintRow[j];
+						solverConstraint.m_originalContactPoint = constraint;
+
+						solverConstraint.m_solverBodyIdA = idA;
+						solverConstraint.m_solverBodyIdB = idB;
+
+						{
+							const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
+							solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
+						}
+						{
+							const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
+							solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
+						}
+
+						{
+							btVector3 iMJlA = solverConstraint.m_contactNormal*rbA.getInvMass();
+							btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
+							btVector3 iMJlB = solverConstraint.m_contactNormal*rbB.getInvMass();//sign of normal?
+							btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
+
+							btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
+							sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
+							sum += iMJlB.dot(solverConstraint.m_contactNormal);
+							sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
+
+							solverConstraint.m_jacDiagABInv = btScalar(1.)/sum;
+						}
+
+
+						///fix rhs
+						///todo: add force/torque accelerators
+						{
+							btScalar rel_vel;
+							btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.getLinearVelocity()) + solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity());
+							btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.getLinearVelocity()) + solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity());
+
+							rel_vel = vel1Dotn+vel2Dotn;
+
+							btScalar restitution = 0.f;
+							btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
+							btScalar	velocityError = restitution - rel_vel;// * damping;
+							btScalar	penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
+							btScalar	velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
+							solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
+							solverConstraint.m_appliedImpulse = 0.f;
+
+						}
+					}
+
+					PfxConstraintPair& pair = jointPairs[actualNumJoints];
+					
+					int numConstraintRows= info1.m_numConstraintRows;
+					pfxSetNumConstraints(pair,numConstraintRows);
+					
+
+
+					pfxSetRigidBodyIdA(pair,idA);
+					pfxSetRigidBodyIdB(pair,idB);
+					//is this needed?
+					if (idA>=0)
+						pfxSetMotionMaskA(pair,m_memoryCache->m_mystates[idA].getMotionMask());
+					if (idB>=0)
+						pfxSetMotionMaskB(pair,m_memoryCache->m_mystates[idB].getMotionMask());
+
+					pfxSetActive(pair,true);
+					int id = currentConstraintRow-offsetSolverConstraints;
+					pfxSetContactId(pair,id);
+					actualNumJoints++;
+
+
+				}
+				currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
+			}
+		}
+	}
+
+
+	
+	float separateBias=0.1;//info.m_erp;//or m_erp2?
+	float timeStep=infoGlobal.m_timeStep;
+	int iteration=infoGlobal.m_numIterations;
+
+	//create a pair for each constraints, copy over info etc
+	
+	
+
+
+	
+	{
+		BT_PROFILE("compute num contacts");
+		int totalContacts =0;
+
+		for (int i=0;i<actualNumManifolds;i++)
+		{
+			PfxConstraintPair* pair = &m_memoryCache->m_mypairs[i];
+			totalContacts += pfxGetNumConstraints(*pair);
+		}
+		//printf("numManifolds = %d\n",numManifolds);
+		//printf("totalContacts=%d\n",totalContacts);
+	}
+	
+
+
+//	printf("actualNumManifolds=%d\n",actualNumManifolds);
+	{
+		BT_PROFILE("BPE_customConstraintSolverSequentialNew");
+		if (numRigidBodies>0 && (actualNumManifolds+actualNumJoints)>0)
+		{
+//			PFX_PRINTF("num points = %d\n",totalPoints);
+//			PFX_PRINTF("num points PFX = %d\n",total);
+			
+			
+			PfxConstraintRow* contactRows = actualNumManifolds? &m_memoryCache->m_constraintRows[0] : 0;
+			 PfxBroadphasePair* actualPairs = m_memoryCache->m_mypairs.size() ? &m_memoryCache->m_mypairs[0] : 0;
+			BPE_customConstraintSolverSequentialNew(
+				actualNumManifolds,
+				actualPairs,
+				offsetContactManifolds,
+				contactRows,
+				&m_memoryCache->m_mystates[0],numRigidBodies,
+				&m_memoryCache->m_mysolverbodies[0],
+				jointPairs,actualNumJoints,
+				offsetSolverConstraints,
+				separateBias,timeStep,iteration,
+				m_solverThreadSupport,m_criticalSection,m_solverIO,m_barrier);
+		}
+	}
+
+	//copy results back to bodies
+	{
+		BT_PROFILE("copy back");
+		for (int i=0;i<numRigidBodies;i++)
+		{
+			btCollisionObject* obj = bodies1[i];
+			btRigidBody* rb = btRigidBody::upcast(obj);
+			TrbState& state = m_memoryCache->m_mystates[i];
+			if (rb && (rb->getInvMass()>0.f))
+			{
+				rb->setLinearVelocity(btVector3(state.getLinearVelocity().getX(),state.getLinearVelocity().getY(),state.getLinearVelocity().getZ()));
+				rb->setAngularVelocity(btVector3(state.getAngularVelocity().getX(),state.getAngularVelocity().getY(),state.getAngularVelocity().getZ()));
+			}
+		}
+	}
+
+
+	return 0.f;
+}
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btParallelConstraintSolver.h b/Engine/lib/bullet/src/BulletMultiThreaded/btParallelConstraintSolver.h
new file mode 100644
index 000000000..af42a8380
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btParallelConstraintSolver.h
@@ -0,0 +1,288 @@
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef __BT_PARALLEL_CONSTRAINT_SOLVER_H
+#define __BT_PARALLEL_CONSTRAINT_SOLVER_H
+
+#include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
+
+
+
+
+#include "LinearMath/btScalar.h"
+#include "PlatformDefinitions.h"
+
+
+#define PFX_MAX_SOLVER_PHASES 64
+#define PFX_MAX_SOLVER_BATCHES 16
+#define PFX_MAX_SOLVER_PAIRS  128
+#define PFX_MIN_SOLVER_PAIRS  16
+
+#ifdef __CELLOS_LV2__
+ATTRIBUTE_ALIGNED128(struct) PfxParallelBatch {
+#else
+ATTRIBUTE_ALIGNED16(struct) PfxParallelBatch {
+#endif
+	uint16_t pairIndices[PFX_MAX_SOLVER_PAIRS];
+};
+
+#ifdef __CELLOS_LV2__
+ATTRIBUTE_ALIGNED128(struct) PfxParallelGroup {
+#else
+ATTRIBUTE_ALIGNED16(struct) PfxParallelGroup {
+#endif
+	uint16_t numPhases;
+	uint16_t numBatches[PFX_MAX_SOLVER_PHASES];
+	uint16_t numPairs[PFX_MAX_SOLVER_PHASES*PFX_MAX_SOLVER_BATCHES];
+};
+
+
+
+ATTRIBUTE_ALIGNED16(struct) PfxSortData16 {
+	union {
+		uint8_t   i8data[16];
+		uint16_t  i16data[8];
+		uint32_t  i32data[4];
+#ifdef __SPU__
+		vec_uint4 vdata;
+#endif
+	};
+
+#ifdef __SPU__
+	void set8(int elem,uint8_t data)   {vdata=(vec_uint4)spu_insert(data,(vec_uchar16)vdata,elem);}
+	void set16(int elem,uint16_t data) {vdata=(vec_uint4)spu_insert(data,(vec_ushort8)vdata,elem);}
+	void set32(int elem,uint32_t data) {vdata=(vec_uint4)spu_insert(data,(vec_uint4)vdata,elem);}
+	uint8_t get8(int elem)   const {return spu_extract((vec_uchar16)vdata,elem);}
+	uint16_t get16(int elem) const {return spu_extract((vec_ushort8)vdata,elem);}
+	uint32_t get32(int elem) const {return spu_extract((vec_uint4)vdata,elem);}
+#else
+	void set8(int elem,uint8_t data)   {i8data[elem] = data;}
+	void set16(int elem,uint16_t data) {i16data[elem] = data;}
+	void set32(int elem,uint32_t data) {i32data[elem] = data;}
+	uint8_t get8(int elem)   const {return i8data[elem];}
+	uint16_t get16(int elem) const {return i16data[elem];}
+	uint32_t get32(int elem) const {return i32data[elem];}
+#endif
+};
+
+typedef PfxSortData16 PfxConstraintPair;
+
+
+//J	PfxBroadphasePair�Ƌ���
+
+SIMD_FORCE_INLINE void pfxSetConstraintId(PfxConstraintPair &pair,uint32_t i)	{pair.set32(2,i);}
+SIMD_FORCE_INLINE void pfxSetNumConstraints(PfxConstraintPair &pair,uint8_t n)	{pair.set8(7,n);}
+
+SIMD_FORCE_INLINE uint32_t pfxGetConstraintId1(const PfxConstraintPair &pair)	{return pair.get32(2);}
+SIMD_FORCE_INLINE uint8_t  pfxGetNumConstraints(const PfxConstraintPair &pair)	{return pair.get8(7);}
+
+typedef PfxSortData16 PfxBroadphasePair;
+
+SIMD_FORCE_INLINE void pfxSetRigidBodyIdA(PfxBroadphasePair &pair,uint16_t i)	{pair.set16(0,i);}
+SIMD_FORCE_INLINE void pfxSetRigidBodyIdB(PfxBroadphasePair &pair,uint16_t i)	{pair.set16(1,i);}
+SIMD_FORCE_INLINE void pfxSetMotionMaskA(PfxBroadphasePair &pair,uint8_t i)		{pair.set8(4,i);}
+SIMD_FORCE_INLINE void pfxSetMotionMaskB(PfxBroadphasePair &pair,uint8_t i)		{pair.set8(5,i);}
+SIMD_FORCE_INLINE void pfxSetBroadphaseFlag(PfxBroadphasePair &pair,uint8_t f)	{pair.set8(6,(pair.get8(6)&0xf0)|(f&0x0f));}
+SIMD_FORCE_INLINE void pfxSetActive(PfxBroadphasePair &pair,bool b)			{pair.set8(6,(pair.get8(6)&0x0f)|((b?1:0)<<4));}
+SIMD_FORCE_INLINE void pfxSetContactId(PfxBroadphasePair &pair,uint32_t i)		{pair.set32(2,i);}
+
+SIMD_FORCE_INLINE uint16_t pfxGetRigidBodyIdA(const PfxBroadphasePair &pair)	{return pair.get16(0);}
+SIMD_FORCE_INLINE uint16_t pfxGetRigidBodyIdB(const PfxBroadphasePair &pair)	{return pair.get16(1);}
+SIMD_FORCE_INLINE uint8_t  pfxGetMotionMaskA(const PfxBroadphasePair &pair)		{return pair.get8(4);}
+SIMD_FORCE_INLINE uint8_t  pfxGetMotionMaskB(const PfxBroadphasePair &pair)		{return pair.get8(5);}
+SIMD_FORCE_INLINE uint8_t  pfxGetBroadphaseFlag(const PfxBroadphasePair &pair)	{return pair.get8(6)&0x0f;}
+SIMD_FORCE_INLINE bool     pfxGetActive(const PfxBroadphasePair &pair)			{return (pair.get8(6)>>4)!=0;}
+SIMD_FORCE_INLINE uint32_t pfxGetContactId1(const PfxBroadphasePair &pair)		{return pair.get32(2);}
+
+
+
+#if defined(__PPU__) || defined (__SPU__)
+ATTRIBUTE_ALIGNED128(struct) PfxSolverBody {
+#else
+ATTRIBUTE_ALIGNED16(struct) PfxSolverBody {
+#endif
+	vmVector3 mDeltaLinearVelocity;
+	vmVector3 mDeltaAngularVelocity;
+	vmMatrix3 mInertiaInv;
+	vmQuat    mOrientation;
+	float   mMassInv;
+	float   friction;
+	float   restitution;
+	float   unused;
+	float   unused2;
+	float   unused3;
+	float   unused4;
+	float   unused5;
+};
+
+
+#ifdef __PPU__
+#include "SpuDispatch/BulletPE2ConstraintSolverSpursSupport.h"
+#endif
+
+static SIMD_FORCE_INLINE vmVector3 btReadVector3(const double* p)
+{
+	float tmp[3] = {float(p[0]),float(p[1]),float(p[2])};
+	vmVector3 v;
+	loadXYZ(v, tmp);
+	return v;
+}
+
+static SIMD_FORCE_INLINE vmQuat btReadQuat(const double* p)
+{
+	float tmp[4] = {float(p[0]),float(p[1]),float(p[2]),float(p[4])};
+	vmQuat vq;
+	loadXYZW(vq, tmp);
+	return vq;
+}
+
+static SIMD_FORCE_INLINE void btStoreVector3(const vmVector3 &src, double* p)
+{
+	float tmp[3];
+	vmVector3 v = src;
+	storeXYZ(v, tmp);
+	p[0] = tmp[0];
+	p[1] = tmp[1];
+	p[2] = tmp[2];
+}
+
+
+static SIMD_FORCE_INLINE vmVector3 btReadVector3(const float* p)
+{
+	vmVector3 v;
+	loadXYZ(v, p);
+	return v;
+}
+
+static SIMD_FORCE_INLINE vmQuat btReadQuat(const float* p)
+{
+	vmQuat vq;
+	loadXYZW(vq, p);
+	return vq;
+}
+
+static SIMD_FORCE_INLINE void btStoreVector3(const vmVector3 &src, float* p)
+{
+	vmVector3 v = src;
+	storeXYZ(v, p);
+}
+
+
+
+
+class btPersistentManifold;
+
+enum {
+	PFX_CONSTRAINT_SOLVER_CMD_SETUP_SOLVER_BODIES,
+	PFX_CONSTRAINT_SOLVER_CMD_SETUP_CONTACT_CONSTRAINTS,
+	PFX_CONSTRAINT_SOLVER_CMD_WRITEBACK_APPLIED_IMPULSES_CONTACT_CONSTRAINTS,
+	PFX_CONSTRAINT_SOLVER_CMD_SETUP_JOINT_CONSTRAINTS,
+	PFX_CONSTRAINT_SOLVER_CMD_SOLVE_CONSTRAINTS,
+	PFX_CONSTRAINT_SOLVER_CMD_POST_SOLVER
+};
+
+
+struct PfxSetupContactConstraintsIO {
+	PfxConstraintPair *offsetContactPairs;
+	uint32_t numContactPairs1;
+	btPersistentManifold*	offsetContactManifolds;
+	btConstraintRow* offsetContactConstraintRows;
+	class TrbState *offsetRigStates;
+	struct PfxSolverBody *offsetSolverBodies;
+	uint32_t numRigidBodies;
+	float separateBias;
+	float timeStep;
+	class btCriticalSection* criticalSection;
+};
+
+
+
+struct PfxSolveConstraintsIO {
+	PfxParallelGroup *contactParallelGroup;
+	PfxParallelBatch *contactParallelBatches;
+	PfxConstraintPair *contactPairs;
+	uint32_t numContactPairs;
+	btPersistentManifold *offsetContactManifolds;
+	btConstraintRow*	offsetContactConstraintRows;
+	PfxParallelGroup *jointParallelGroup;
+	PfxParallelBatch *jointParallelBatches;
+	PfxConstraintPair *jointPairs;
+	uint32_t numJointPairs;
+	struct btSolverConstraint* offsetSolverConstraints;
+	TrbState *offsetRigStates1;
+	PfxSolverBody *offsetSolverBodies;
+	uint32_t numRigidBodies;
+	uint32_t iteration;
+
+	uint32_t	taskId;
+	
+	class btBarrier* barrier;
+
+};
+
+struct PfxPostSolverIO {
+	TrbState *states;
+	PfxSolverBody *solverBodies;
+	uint32_t numRigidBodies;
+};
+
+ATTRIBUTE_ALIGNED16(struct) btConstraintSolverIO {
+	uint8_t cmd;
+	union {
+		PfxSetupContactConstraintsIO setupContactConstraints;
+		PfxSolveConstraintsIO solveConstraints;
+		PfxPostSolverIO postSolver;
+	};
+	
+	//SPU only
+	uint32_t barrierAddr2;
+	uint32_t criticalsectionAddr2;
+	uint32_t maxTasks1;
+};
+
+
+
+
+void	SolverThreadFunc(void* userPtr,void* lsMemory);
+void*	SolverlsMemoryFunc();
+///The btParallelConstraintSolver performs computations on constraint rows in parallel
+///Using the cross-platform threading it supports Windows, Linux, Mac OSX and PlayStation 3 Cell SPUs
+class btParallelConstraintSolver : public btSequentialImpulseConstraintSolver
+{
+	
+protected:
+	struct btParallelSolverMemoryCache*	m_memoryCache;
+
+	class btThreadSupportInterface*	m_solverThreadSupport;
+
+	struct btConstraintSolverIO* m_solverIO;
+	class btBarrier*			m_barrier;
+	class btCriticalSection*	m_criticalSection;
+
+
+public:
+
+	btParallelConstraintSolver(class btThreadSupportInterface* solverThreadSupport);
+	
+	virtual ~btParallelConstraintSolver();
+
+	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher);
+
+};
+
+
+
+#endif //__BT_PARALLEL_CONSTRAINT_SOLVER_H
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/btThreadSupportInterface.h b/Engine/lib/bullet/src/BulletMultiThreaded/btThreadSupportInterface.h
index 730ffa9ea..54f1769cf 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/btThreadSupportInterface.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/btThreadSupportInterface.h
@@ -13,14 +13,39 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef THREAD_SUPPORT_INTERFACE_H
-#define THREAD_SUPPORT_INTERFACE_H
+#ifndef BT_THREAD_SUPPORT_INTERFACE_H
+#define BT_THREAD_SUPPORT_INTERFACE_H
 
 
-//#include <LinearMath/btScalar.h> //for uint32_t etc.
+#include <LinearMath/btScalar.h> //for ATTRIBUTE_ALIGNED16
 #include "PlatformDefinitions.h"
 #include "PpuAddressSpace.h"
 
+class btBarrier {
+public:
+	btBarrier() {}
+	virtual ~btBarrier() {}
+
+	virtual void sync() = 0;
+	virtual void setMaxCount(int n) = 0;
+	virtual int  getMaxCount() = 0;
+};
+
+class btCriticalSection {
+public:
+	btCriticalSection() {}
+	virtual ~btCriticalSection() {}
+
+	ATTRIBUTE_ALIGNED16(unsigned int mCommonBuff[32]);
+
+	virtual unsigned int getSharedParam(int i) = 0;
+	virtual void setSharedParam(int i,unsigned int p) = 0;
+
+	virtual void lock() = 0;
+	virtual void unlock() = 0;
+};
+
+
 class btThreadSupportInterface
 {
 public:
@@ -33,6 +58,10 @@ public:
 ///check for messages from SPUs
 	virtual	void waitForResponse(unsigned int *puiArgument0, unsigned int *puiArgument1) =0;
 
+
+	///non-blocking test if a task is completed. First implement all versions, and then enable this API
+	///virtual bool isTaskCompleted(unsigned int *puiArgument0, unsigned int *puiArgument1, int timeOutInMilliseconds)=0;
+
 ///start the spus (can be called at the beginning of each frame, to make sure that the right SPU program is loaded)
 	virtual	void startSPU() =0;
 
@@ -44,7 +73,17 @@ public:
 
 	virtual int		getNumTasks() const = 0;
 
+	virtual btBarrier*	createBarrier() = 0;
+
+	virtual btCriticalSection* createCriticalSection() = 0;
+
+	virtual void deleteBarrier(btBarrier* barrier)=0;
+
+    virtual void deleteCriticalSection(btCriticalSection* criticalSection)=0;
+	
+	virtual void*	getThreadLocalMemory(int taskId) { return 0; }
+
 };
 
-#endif //THREAD_SUPPORT_INTERFACE_H
+#endif //BT_THREAD_SUPPORT_INTERFACE_H
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath2bullet.h b/Engine/lib/bullet/src/BulletMultiThreaded/vectormath2bullet.h
index efc7a3c92..4cc72ac58 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath2bullet.h
+++ b/Engine/lib/bullet/src/BulletMultiThreaded/vectormath2bullet.h
@@ -27,19 +27,17 @@
    POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifndef AOS_VECTORMATH_BULLET_CONVERT_H
-#define AOS_VECTORMATH_BULLET_CONVERT_H
-
-#include <vectormath_aos.h>
-//#include "BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h"
+#ifndef BT_AOS_VECTORMATH_BULLET_CONVERT_H
+#define BT_AOS_VECTORMATH_BULLET_CONVERT_H
 
+#include "PlatformDefinitions.h"
 #include "LinearMath/btVector3.h"
 #include "LinearMath/btQuaternion.h"
 #include "LinearMath/btMatrix3x3.h"
 
 inline Vectormath::Aos::Vector3	getVmVector3(const btVector3& bulletVec)
 {
-	return Vectormath::Aos::Vector3(bulletVec.getX(),bulletVec.getY(),bulletVec.getZ());
+	return Vectormath::Aos::Vector3((float)bulletVec.getX(),(float)bulletVec.getY(),(float)bulletVec.getZ());
 }
 
 inline btVector3 getBtVector3(const Vectormath::Aos::Vector3& vmVec)
@@ -53,7 +51,7 @@ inline btVector3 getBtVector3(const Vectormath::Aos::Point3& vmVec)
 
 inline Vectormath::Aos::Quat	getVmQuat(const btQuaternion& bulletQuat)
 {
-	Vectormath::Aos::Quat vmQuat(bulletQuat.getX(),bulletQuat.getY(),bulletQuat.getZ(),bulletQuat.getW());
+	Vectormath::Aos::Quat vmQuat((float)bulletQuat.getX(),(float)bulletQuat.getY(),(float)bulletQuat.getZ(),(float)bulletQuat.getW());
 	return vmQuat;
 }
 
@@ -72,4 +70,4 @@ inline Vectormath::Aos::Matrix3	getVmMatrix3(const btMatrix3x3& btMat)
 }
 
 
-#endif //AOS_VECTORMATH_BULLET_CONVERT_H
+#endif //BT_AOS_VECTORMATH_BULLET_CONVERT_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/CMakeLists.txt b/Engine/lib/bullet/src/BulletSoftBody/CMakeLists.txt
index 70f0fb446..105379582 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/CMakeLists.txt
+++ b/Engine/lib/bullet/src/BulletSoftBody/CMakeLists.txt
@@ -1,8 +1,11 @@
 
 INCLUDE_DIRECTORIES(
-${BULLET_PHYSICS_SOURCE_DIR}/src }
+${BULLET_PHYSICS_SOURCE_DIR}/src
+	
 )
 
+#SUBDIRS( Solvers )
+
 SET(BulletSoftBody_SRCS
 	btSoftBody.cpp
 	btSoftBodyConcaveCollisionAlgorithm.cpp
@@ -11,10 +14,13 @@ SET(BulletSoftBody_SRCS
 	btSoftRigidCollisionAlgorithm.cpp
 	btSoftRigidDynamicsWorld.cpp
 	btSoftSoftCollisionAlgorithm.cpp
+	btDefaultSoftBodySolver.cpp
+
 )
 
 SET(BulletSoftBody_HDRS
 	btSoftBody.h
+	btSoftBodyData.h
 	btSoftBodyConcaveCollisionAlgorithm.h
 	btSoftBodyHelpers.h
 	btSoftBodyRigidBodyCollisionConfiguration.h
@@ -22,6 +28,11 @@ SET(BulletSoftBody_HDRS
 	btSoftRigidDynamicsWorld.h
 	btSoftSoftCollisionAlgorithm.h
 	btSparseSDF.h
+
+	btSoftBodySolvers.h
+	btDefaultSoftBodySolver.h
+
+	btSoftBodySolverVertexBuffer.h
 )
 
 
@@ -33,16 +44,22 @@ IF (BUILD_SHARED_LIBS)
 	TARGET_LINK_LIBRARIES(BulletSoftBody BulletDynamics)
 ENDIF (BUILD_SHARED_LIBS)
 
-IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-	IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-		INSTALL(TARGETS BulletSoftBody DESTINATION .)
-	ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	INSTALL(TARGETS BulletSoftBody DESTINATION lib)
-        INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
-	ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS BulletSoftBody DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			INSTALL(TARGETS BulletSoftBody DESTINATION lib${LIB_SUFFIX})
+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
+".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
 
-IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	SET_TARGET_PROPERTIES(BulletSoftBody PROPERTIES FRAMEWORK true)
-	SET_TARGET_PROPERTIES(BulletSoftBody PROPERTIES PUBLIC_HEADER "${BulletSoftBody_HDRS}")
-ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(BulletSoftBody PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(BulletSoftBody PROPERTIES PUBLIC_HEADER "${BulletSoftBody_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/BulletSoftBody/Jamfile b/Engine/lib/bullet/src/BulletSoftBody/Jamfile
deleted file mode 100644
index bb6061928..000000000
--- a/Engine/lib/bullet/src/BulletSoftBody/Jamfile
+++ /dev/null
@@ -1,9 +0,0 @@
-
-SubDir TOP src BulletSoftBody ;
-
-Description bulletsoftbody : "Bullet Softbody Dynamics" ;
-Library bulletsoftbody : 
-  [ Wildcard *.h *.cpp ]
-;
-
-LibDepends bulletsoftbody : bulletdynamics bulletcollision  ;
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btDefaultSoftBodySolver.cpp b/Engine/lib/bullet/src/BulletSoftBody/btDefaultSoftBodySolver.cpp
new file mode 100644
index 000000000..e90d24e6e
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletSoftBody/btDefaultSoftBodySolver.cpp
@@ -0,0 +1,151 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletCollision/CollisionShapes/btCollisionShape.h"
+
+#include "btDefaultSoftBodySolver.h"
+#include "BulletCollision/CollisionShapes/btCapsuleShape.h"
+#include "BulletSoftBody/btSoftBody.h"
+
+
+btDefaultSoftBodySolver::btDefaultSoftBodySolver()
+{
+	// Initial we will clearly need to update solver constants
+	// For now this is global for the cloths linked with this solver - we should probably make this body specific 
+	// for performance in future once we understand more clearly when constants need to be updated
+	m_updateSolverConstants = true;
+}
+
+btDefaultSoftBodySolver::~btDefaultSoftBodySolver()
+{
+}
+
+// In this case the data is already in the soft bodies so there is no need for us to do anything
+void btDefaultSoftBodySolver::copyBackToSoftBodies(bool bMove)
+{
+
+}
+
+void btDefaultSoftBodySolver::optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate)
+{
+	m_softBodySet.copyFromArray( softBodies );
+}
+
+void btDefaultSoftBodySolver::updateSoftBodies( )
+{
+	for ( int i=0; i < m_softBodySet.size(); i++)
+	{
+		btSoftBody*	psb=(btSoftBody*)m_softBodySet[i];
+		if (psb->isActive())
+		{
+			psb->integrateMotion();	
+		}
+	}
+} // updateSoftBodies
+
+bool btDefaultSoftBodySolver::checkInitialized()
+{
+	return true;
+}
+
+void btDefaultSoftBodySolver::solveConstraints( float solverdt )
+{
+	// Solve constraints for non-solver softbodies
+	for(int i=0; i < m_softBodySet.size(); ++i)
+	{
+		btSoftBody*	psb = static_cast<btSoftBody*>(m_softBodySet[i]);
+		if (psb->isActive())
+		{
+			psb->solveConstraints();
+		}
+	}	
+} // btDefaultSoftBodySolver::solveConstraints
+
+
+void btDefaultSoftBodySolver::copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer )
+{
+	// Currently only support CPU output buffers
+	// TODO: check for DX11 buffers. Take all offsets into the same DX11 buffer
+	// and use them together on a single kernel call if possible by setting up a
+	// per-cloth target buffer array for the copy kernel.
+
+	if( vertexBuffer->getBufferType() == btVertexBufferDescriptor::CPU_BUFFER )
+	{
+		const btAlignedObjectArray<btSoftBody::Node> &clothVertices( softBody->m_nodes );
+		int numVertices = clothVertices.size();
+
+		const btCPUVertexBufferDescriptor *cpuVertexBuffer = static_cast< btCPUVertexBufferDescriptor* >(vertexBuffer);						
+		float *basePointer = cpuVertexBuffer->getBasePointer();						
+
+		if( vertexBuffer->hasVertexPositions() )
+		{
+			const int vertexOffset = cpuVertexBuffer->getVertexOffset();
+			const int vertexStride = cpuVertexBuffer->getVertexStride();
+			float *vertexPointer = basePointer + vertexOffset;
+
+			for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+			{
+				btVector3 position = clothVertices[vertexIndex].m_x;
+				*(vertexPointer + 0) = position.getX();
+				*(vertexPointer + 1) = position.getY();
+				*(vertexPointer + 2) = position.getZ();
+				vertexPointer += vertexStride;
+			}
+		}
+		if( vertexBuffer->hasNormals() )
+		{
+			const int normalOffset = cpuVertexBuffer->getNormalOffset();
+			const int normalStride = cpuVertexBuffer->getNormalStride();
+			float *normalPointer = basePointer + normalOffset;
+
+			for( int vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex )
+			{
+				btVector3 normal = clothVertices[vertexIndex].m_n;
+				*(normalPointer + 0) = normal.getX();
+				*(normalPointer + 1) = normal.getY();
+				*(normalPointer + 2) = normal.getZ();
+				normalPointer += normalStride;
+			}
+		}
+	}
+} // btDefaultSoftBodySolver::copySoftBodyToVertexBuffer
+
+void btDefaultSoftBodySolver::processCollision( btSoftBody* softBody, btSoftBody* otherSoftBody)
+{
+	softBody->defaultCollisionHandler( otherSoftBody);
+}
+
+// For the default solver just leave the soft body to do its collision processing
+void btDefaultSoftBodySolver::processCollision( btSoftBody *softBody, const btCollisionObjectWrapper* collisionObjectWrap )
+{
+	softBody->defaultCollisionHandler( collisionObjectWrap );
+} // btDefaultSoftBodySolver::processCollision
+
+
+void btDefaultSoftBodySolver::predictMotion( float timeStep )
+{
+	for ( int i=0; i < m_softBodySet.size(); ++i)
+	{
+		btSoftBody*	psb = m_softBodySet[i];
+
+		if (psb->isActive())
+		{
+			psb->predictMotion(timeStep);		
+		}
+	}
+}
+
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btDefaultSoftBodySolver.h b/Engine/lib/bullet/src/BulletSoftBody/btDefaultSoftBodySolver.h
new file mode 100644
index 000000000..1c17ffcbb
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletSoftBody/btDefaultSoftBodySolver.h
@@ -0,0 +1,63 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_DEFAULT_SOLVER_H
+#define BT_SOFT_BODY_DEFAULT_SOLVER_H
+
+
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBodySolverVertexBuffer.h"
+struct btCollisionObjectWrapper;
+
+class btDefaultSoftBodySolver : public btSoftBodySolver
+{
+protected:		
+	/** Variable to define whether we need to update solver constants on the next iteration */
+	bool m_updateSolverConstants;
+
+	btAlignedObjectArray< btSoftBody * > m_softBodySet;
+
+
+public:
+	btDefaultSoftBodySolver();
+	
+	virtual ~btDefaultSoftBodySolver();
+	
+	virtual SolverTypes getSolverType() const
+	{
+		return DEFAULT_SOLVER;
+	}
+
+	virtual bool checkInitialized();
+
+	virtual void updateSoftBodies( );
+
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies,bool forceUpdate=false );
+
+	virtual void copyBackToSoftBodies(bool bMove = true);
+
+	virtual void solveConstraints( float solverdt );
+
+	virtual void predictMotion( float solverdt );
+
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer );
+
+	virtual void processCollision( btSoftBody *, const btCollisionObjectWrapper* );
+
+	virtual void processCollision( btSoftBody*, btSoftBody* );
+
+};
+
+#endif // #ifndef BT_ACCELERATED_SOFT_BODY_CPU_SOLVER_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.cpp b/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.cpp
index 431546f17..8fe34658e 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.cpp
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.cpp
@@ -15,12 +15,52 @@ subject to the following restrictions:
 ///btSoftBody implementation by Nathanael Presson
 
 #include "btSoftBodyInternals.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "btSoftBodyData.h"
+#include "LinearMath/btSerializer.h"
+
 
 //
 btSoftBody::btSoftBody(btSoftBodyWorldInfo*	worldInfo,int node_count,  const btVector3* x,  const btScalar* m)
-:m_worldInfo(worldInfo)
+:m_softBodySolver(0),m_worldInfo(worldInfo)
 {	
 	/* Init		*/ 
+	initDefaults();
+
+	/* Default material	*/ 
+	Material*	pm=appendMaterial();
+	pm->m_kLST	=	1;
+	pm->m_kAST	=	1;
+	pm->m_kVST	=	1;
+	pm->m_flags	=	fMaterial::Default;
+
+	/* Nodes			*/ 
+	const btScalar		margin=getCollisionShape()->getMargin();
+	m_nodes.resize(node_count);
+	for(int i=0,ni=node_count;i<ni;++i)
+	{	
+		Node&	n=m_nodes[i];
+		ZeroInitialize(n);
+		n.m_x		=	x?*x++:btVector3(0,0,0);
+		n.m_q		=	n.m_x;
+		n.m_im		=	m?*m++:1;
+		n.m_im		=	n.m_im>0?1/n.m_im:0;
+		n.m_leaf	=	m_ndbvt.insert(btDbvtVolume::FromCR(n.m_x,margin),&n);
+		n.m_material=	pm;
+	}
+	updateBounds();	
+
+}
+
+btSoftBody::btSoftBody(btSoftBodyWorldInfo*	worldInfo)
+:m_worldInfo(worldInfo)
+{
+	initDefaults();
+}
+
+
+void	btSoftBody::initDefaults()
+{
 	m_internalType		=	CO_SOFT_BODY;
 	m_cfg.aeromodel		=	eAeroModel::V_Point;
 	m_cfg.kVCF			=	1;
@@ -61,33 +101,16 @@ btSoftBody::btSoftBody(btSoftBodyWorldInfo*	worldInfo,int node_count,  const btV
 	m_bounds[1]			=	btVector3(0,0,0);
 	m_worldTransform.setIdentity();
 	setSolver(eSolverPresets::Positions);
-	/* Default material	*/ 
-	Material*	pm=appendMaterial();
-	pm->m_kLST	=	1;
-	pm->m_kAST	=	1;
-	pm->m_kVST	=	1;
-	pm->m_flags	=	fMaterial::Default;
+	
 	/* Collision shape	*/ 
 	///for now, create a collision shape internally
 	m_collisionShape = new btSoftBodyCollisionShape(this);
-	m_collisionShape->setMargin(0.25);
-	/* Nodes			*/ 
-	const btScalar		margin=getCollisionShape()->getMargin();
-	m_nodes.resize(node_count);
-	for(int i=0,ni=node_count;i<ni;++i)
-	{	
-		Node&	n=m_nodes[i];
-		ZeroInitialize(n);
-		n.m_x		=	x?*x++:btVector3(0,0,0);
-		n.m_q		=	n.m_x;
-		n.m_im		=	m?*m++:1;
-		n.m_im		=	n.m_im>0?1/n.m_im:0;
-		n.m_leaf	=	m_ndbvt.insert(btDbvtVolume::FromCR(n.m_x,margin),&n);
-		n.m_material=	pm;
-	}
-	updateBounds();	
-
+	m_collisionShape->setMargin(0.25f);
+	
 	m_initialWorldTransform.setIdentity();
+
+	m_windVelocity = btVector3(0,0,0);
+	m_restLengthScale = btScalar(1.0);
 }
 
 //
@@ -334,7 +357,15 @@ void			btSoftBody::appendTetra(int node0,
 }
 
 //
-void			btSoftBody::appendAnchor(int node,btRigidBody* body, bool disableCollisionBetweenLinkedBodies)
+
+void			btSoftBody::appendAnchor(int node,btRigidBody* body, bool disableCollisionBetweenLinkedBodies,btScalar influence)
+{
+	btVector3 local = body->getWorldTransform().inverse()*m_nodes[node].m_x;
+	appendAnchor(node,body,local,disableCollisionBetweenLinkedBodies,influence);
+}
+
+//
+void			btSoftBody::appendAnchor(int node,btRigidBody* body, const btVector3& localPivot,bool disableCollisionBetweenLinkedBodies,btScalar influence)
 {
 	if (disableCollisionBetweenLinkedBodies)
 	{
@@ -347,8 +378,9 @@ void			btSoftBody::appendAnchor(int node,btRigidBody* body, bool disableCollisio
 	Anchor	a;
 	a.m_node			=	&m_nodes[node];
 	a.m_body			=	body;
-	a.m_local			=	body->getInterpolationWorldTransform().inverse()*a.m_node->m_x;
+	a.m_local			=	localPivot;
 	a.m_node->m_battach	=	1;
+	a.m_influence = influence;
 	m_anchors.push_back(a);
 }
 
@@ -421,6 +453,191 @@ void			btSoftBody::addForce(const btVector3& force,int node)
 	}
 }
 
+void			btSoftBody::addAeroForceToNode(const btVector3& windVelocity,int nodeIndex)
+{
+	btAssert(nodeIndex >= 0 && nodeIndex < m_nodes.size());
+
+	const btScalar dt = m_sst.sdt;
+	const btScalar kLF = m_cfg.kLF;
+	const btScalar kDG = m_cfg.kDG;
+	//const btScalar kPR = m_cfg.kPR;
+	//const btScalar kVC = m_cfg.kVC;
+	const bool as_lift = kLF>0;
+	const bool as_drag = kDG>0;
+	const bool as_aero = as_lift || as_drag;
+	const bool as_vaero = as_aero && (m_cfg.aeromodel < btSoftBody::eAeroModel::F_TwoSided);
+
+	Node& n = m_nodes[nodeIndex];
+
+	if( n.m_im>0 )
+	{
+		btSoftBody::sMedium	medium;
+
+		EvaluateMedium(m_worldInfo, n.m_x, medium);
+		medium.m_velocity = windVelocity;
+		medium.m_density = m_worldInfo->air_density;
+
+		/* Aerodynamics			*/ 
+		if(as_vaero)
+		{				
+			const btVector3	rel_v = n.m_v - medium.m_velocity;					
+			const btScalar rel_v_len = rel_v.length();
+			const btScalar	rel_v2 = rel_v.length2();
+
+			if(rel_v2>SIMD_EPSILON)
+			{
+				const btVector3 rel_v_nrm = rel_v.normalized();
+				btVector3	nrm = n.m_n;						
+
+				if (m_cfg.aeromodel == btSoftBody::eAeroModel::V_TwoSidedLiftDrag)
+				{
+					nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+					btVector3 fDrag(0, 0, 0);
+					btVector3 fLift(0, 0, 0);
+
+					btScalar n_dot_v = nrm.dot(rel_v_nrm);
+					btScalar tri_area = 0.5f * n.m_area;
+							
+					fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm);
+							
+					// Check angle of attack
+					// cos(10�) = 0.98480
+					if ( 0 < n_dot_v && n_dot_v < 0.98480f)
+						fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f-n_dot_v*n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm));
+
+					// Check if the velocity change resulted by aero drag force exceeds the current velocity of the node.
+					btVector3 del_v_by_fDrag = fDrag*n.m_im*m_sst.sdt;										
+					btScalar del_v_by_fDrag_len2 = del_v_by_fDrag.length2();
+					btScalar v_len2 = n.m_v.length2();
+
+					if (del_v_by_fDrag_len2 >= v_len2 && del_v_by_fDrag_len2 > 0)
+					{
+						btScalar del_v_by_fDrag_len = del_v_by_fDrag.length();
+						btScalar v_len = n.m_v.length();
+						fDrag *= btScalar(0.8)*(v_len / del_v_by_fDrag_len);
+					}
+
+					n.m_f += fDrag;
+					n.m_f += fLift;
+				}
+				else if (m_cfg.aeromodel == btSoftBody::eAeroModel::V_Point || m_cfg.aeromodel == btSoftBody::eAeroModel::V_OneSided || m_cfg.aeromodel == btSoftBody::eAeroModel::V_TwoSided)
+				{
+					if (btSoftBody::eAeroModel::V_TwoSided)
+						nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+
+					const btScalar dvn = btDot(rel_v,nrm);
+					/* Compute forces	*/ 
+					if(dvn>0)
+					{
+						btVector3		force(0,0,0);
+						const btScalar	c0	=	n.m_area * dvn * rel_v2/2;
+						const btScalar	c1	=	c0 * medium.m_density;
+						force	+=	nrm*(-c1*kLF);
+						force	+=	rel_v.normalized() * (-c1 * kDG);
+						ApplyClampedForce(n, force, dt);
+					}
+				}	
+			}
+		}
+	}
+}
+
+void			btSoftBody::addAeroForceToFace(const btVector3& windVelocity,int faceIndex)
+{
+	const btScalar dt = m_sst.sdt;
+	const btScalar kLF = m_cfg.kLF;
+	const btScalar kDG = m_cfg.kDG;
+//	const btScalar kPR = m_cfg.kPR;
+//	const btScalar kVC = m_cfg.kVC;
+	const bool as_lift = kLF>0;
+	const bool as_drag = kDG>0;
+	const bool as_aero = as_lift || as_drag;
+	const bool as_faero = as_aero && (m_cfg.aeromodel >= btSoftBody::eAeroModel::F_TwoSided);
+
+	if(as_faero)
+	{
+		btSoftBody::Face&	f=m_faces[faceIndex];
+
+		btSoftBody::sMedium	medium;
+		
+		const btVector3	v=(f.m_n[0]->m_v+f.m_n[1]->m_v+f.m_n[2]->m_v)/3;
+		const btVector3	x=(f.m_n[0]->m_x+f.m_n[1]->m_x+f.m_n[2]->m_x)/3;
+		EvaluateMedium(m_worldInfo,x,medium);
+		medium.m_velocity = windVelocity;
+		medium.m_density = m_worldInfo->air_density;
+		const btVector3	rel_v=v-medium.m_velocity;
+		const btScalar rel_v_len = rel_v.length();
+		const btScalar	rel_v2=rel_v.length2();
+
+		if(rel_v2>SIMD_EPSILON)
+		{
+			const btVector3 rel_v_nrm = rel_v.normalized();
+			btVector3	nrm = f.m_normal;
+
+			if (m_cfg.aeromodel == btSoftBody::eAeroModel::F_TwoSidedLiftDrag)
+			{
+				nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+
+				btVector3 fDrag(0, 0, 0);
+				btVector3 fLift(0, 0, 0);
+
+				btScalar n_dot_v = nrm.dot(rel_v_nrm);
+				btScalar tri_area = 0.5f * f.m_ra;
+					
+				fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm);
+
+				// Check angle of attack
+				// cos(10�) = 0.98480
+				if ( 0 < n_dot_v && n_dot_v < 0.98480f)
+					fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f-n_dot_v*n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm));
+
+				fDrag /= 3;
+				fLift /= 3;
+
+				for(int j=0;j<3;++j) 
+				{
+					if (f.m_n[j]->m_im>0)
+					{
+						// Check if the velocity change resulted by aero drag force exceeds the current velocity of the node.
+						btVector3 del_v_by_fDrag = fDrag*f.m_n[j]->m_im*m_sst.sdt;										
+						btScalar del_v_by_fDrag_len2 = del_v_by_fDrag.length2();
+						btScalar v_len2 = f.m_n[j]->m_v.length2();
+
+						if (del_v_by_fDrag_len2 >= v_len2 && del_v_by_fDrag_len2 > 0)
+						{
+							btScalar del_v_by_fDrag_len = del_v_by_fDrag.length();
+							btScalar v_len = f.m_n[j]->m_v.length();
+							fDrag *= btScalar(0.8)*(v_len / del_v_by_fDrag_len);
+						}
+
+						f.m_n[j]->m_f += fDrag; 
+						f.m_n[j]->m_f += fLift;
+					}
+				}
+			}
+			else if (m_cfg.aeromodel == btSoftBody::eAeroModel::F_OneSided || m_cfg.aeromodel == btSoftBody::eAeroModel::F_TwoSided)
+			{
+				if (btSoftBody::eAeroModel::F_TwoSided)
+					nrm *= (btScalar)( (btDot(nrm,rel_v) < 0) ? -1 : +1);
+
+				const btScalar	dvn=btDot(rel_v,nrm);
+				/* Compute forces	*/ 
+				if(dvn>0)
+				{
+					btVector3		force(0,0,0);
+					const btScalar	c0	=	f.m_ra*dvn*rel_v2;
+					const btScalar	c1	=	c0*medium.m_density;
+					force	+=	nrm*(-c1*kLF);
+					force	+=	rel_v.normalized()*(-c1*kDG);
+					force	/=	3;
+					for(int j=0;j<3;++j) ApplyClampedForce(*f.m_n[j],force,dt);
+				}
+			}
+		}
+	}
+
+}
+
 //
 void			btSoftBody::addVelocity(const btVector3& velocity)
 {
@@ -523,11 +740,13 @@ void			btSoftBody::setVolumeMass(btScalar mass)
 {
 btAlignedObjectArray<btScalar>	ranks;
 ranks.resize(m_nodes.size(),0);
-for(int i=0;i<m_nodes.size();++i)
+int i;
+
+for(i=0;i<m_nodes.size();++i)
 	{
 	m_nodes[i].m_im=0;
 	}
-for(int i=0;i<m_tetras.size();++i)
+for(i=0;i<m_tetras.size();++i)
 	{
 	const Tetra& t=m_tetras[i];
 	for(int j=0;j<4;++j)
@@ -536,7 +755,7 @@ for(int i=0;i<m_tetras.size();++i)
 		ranks[int(t.m_n[j]-&m_nodes[0])]+=1;
 		}
 	}
-for(int i=0;i<m_nodes.size();++i)
+for( i=0;i<m_nodes.size();++i)
 	{
 	if(m_nodes[i].m_im>0)
 		{
@@ -604,6 +823,7 @@ void			btSoftBody::rotate(	const btQuaternion& rot)
 //
 void			btSoftBody::scale(const btVector3& scl)
 {
+
 	const btScalar	margin=getCollisionShape()->getMargin();
 	ATTRIBUTE_ALIGNED16(btDbvtVolume)	vol;
 	
@@ -620,6 +840,27 @@ void			btSoftBody::scale(const btVector3& scl)
 	updateConstants();
 }
 
+//
+btScalar btSoftBody::getRestLengthScale()
+{
+	return m_restLengthScale;
+}
+
+//
+void btSoftBody::setRestLengthScale(btScalar restLengthScale)
+{
+	for(int i=0, ni=m_links.size(); i<ni; ++i)
+	{
+		Link&		l=m_links[i];
+		l.m_rl	=	l.m_rl / m_restLengthScale * restLengthScale;
+		l.m_c1	=	l.m_rl*l.m_rl;
+	}
+	m_restLengthScale = restLengthScale;
+	
+	if (getActivationState() == ISLAND_SLEEPING)
+		activate();
+}
+
 //
 void			btSoftBody::setPose(bool bvolume,bool bframe)
 {
@@ -667,9 +908,20 @@ void			btSoftBody::setPose(bool bvolume,bool bframe)
 		m_pose.m_aqq[2]+=mq.z()*q;
 	}
 	m_pose.m_aqq=m_pose.m_aqq.inverse();
+	
 	updateConstants();
 }
 
+void				btSoftBody::resetLinkRestLengths()
+{
+	for(int i=0, ni=m_links.size();i<ni;++i)
+	{
+		Link& l =	m_links[i];
+		l.m_rl	=	(l.m_n[0]->m_x-l.m_n[1]->m_x).length();
+		l.m_c1	=	l.m_rl*l.m_rl;
+	}
+}
+
 //
 btScalar		btSoftBody::getVolume() const
 {
@@ -1192,12 +1444,12 @@ void			btSoftBody::refine(ImplicitFn* ifn,btScalar accurary,bool cut)
 							m=mc*f;
 						}
 						else
-						{ a.m_im/=0.5;m=1/a.m_im; }
+						{ a.m_im/=0.5f;m=1/a.m_im; }
 					}
 					else
 					{
 						if(b.m_im>0)
-						{ b.m_im/=0.5;m=1/b.m_im; }
+						{ b.m_im/=0.5f;m=1/b.m_im; }
 						else
 							m=0;
 					}
@@ -1277,7 +1529,7 @@ void			btSoftBody::refine(ImplicitFn* ifn,btScalar accurary,bool cut)
 			{
 				const btVector3	v=m_nodes[i].m_v;
 				btScalar		m=getMass(i);
-				if(m>0) { m*=0.5;m_nodes[i].m_im/=0.5; }
+				if(m>0) { m*=0.5f;m_nodes[i].m_im/=0.5f; }
 				appendNode(x,m);
 				cnodes[i]=m_nodes.size()-1;
 				m_nodes[cnodes[i]].m_v=v;
@@ -1391,7 +1643,7 @@ bool			btSoftBody::cutLink(int node0,int node1,btScalar position)
 {
 	bool			done=false;
 	int i,ni;
-	const btVector3	d=m_nodes[node0].m_x-m_nodes[node1].m_x;
+//	const btVector3	d=m_nodes[node0].m_x-m_nodes[node1].m_x;
 	const btVector3	x=Lerp(m_nodes[node0].m_x,m_nodes[node1].m_x,position);
 	const btVector3	v=Lerp(m_nodes[node0].m_v,m_nodes[node1].m_v,position);
 	const btScalar	m=1;
@@ -1486,6 +1738,7 @@ void			btSoftBody::setSolver(eSolverPresets::_ preset)
 //
 void			btSoftBody::predictMotion(btScalar dt)
 {
+
 	int i,ni;
 
 	/* Update				*/ 
@@ -1577,6 +1830,7 @@ void			btSoftBody::predictMotion(btScalar dt)
 //
 void			btSoftBody::solveConstraints()
 {
+
 	/* Apply clusters		*/ 
 	applyClusters(false);
 	/* Prepare links		*/ 
@@ -1785,7 +2039,7 @@ btScalar			btSoftBody::RayFromToCaster::rayFromToTriangle(	const btVector3& rayF
 void				btSoftBody::pointersToIndices()
 {
 #define	PTR2IDX(_p_,_b_)	reinterpret_cast<btSoftBody::Node*>((_p_)-(_b_))
-	btSoftBody::Node*	base=&m_nodes[0];
+	btSoftBody::Node*	base=m_nodes.size() ? &m_nodes[0] : 0;
 	int i,ni;
 
 	for(i=0,ni=m_nodes.size();i<ni;++i)
@@ -1829,7 +2083,7 @@ void				btSoftBody::indicesToPointers(const int* map)
 {
 #define	IDX2PTR(_p_,_b_)	map?(&(_b_)[map[(((char*)_p_)-(char*)0)]]):	\
 	(&(_b_)[(((char*)_p_)-(char*)0)])
-	btSoftBody::Node*	base=&m_nodes[0];
+	btSoftBody::Node*	base=m_nodes.size() ? &m_nodes[0]:0;
 	int i,ni;
 
 	for(i=0,ni=m_nodes.size();i<ni;++i)
@@ -1873,11 +2127,12 @@ int					btSoftBody::rayTest(const btVector3& rayFrom,const btVector3& rayTo,
 										btScalar& mint,eFeature::_& feature,int& index,bool bcountonly) const
 {
 	int	cnt=0;
+	btVector3 dir = rayTo-rayFrom;
+	
+
 	if(bcountonly||m_fdbvt.empty())
 	{/* Full search	*/ 
-		btVector3 dir = rayTo-rayFrom;
-		dir.normalize();
-
+		
 		for(int i=0,ni=m_faces.size();i<ni;++i)
 		{
 			const btSoftBody::Face&	f=m_faces[i];
@@ -1912,6 +2167,37 @@ int					btSoftBody::rayTest(const btVector3& rayFrom,const btVector3& rayTo,
 			cnt=1;
 		}
 	}
+
+	for (int i=0;i<m_tetras.size();i++)
+	{
+		const btSoftBody::Tetra& tet = m_tetras[i];
+		int tetfaces[4][3] = {{0,1,2},{0,1,3},{1,2,3},{0,2,3}};
+		for (int f=0;f<4;f++)
+		{
+
+			int index0=tetfaces[f][0];
+			int index1=tetfaces[f][1];
+			int index2=tetfaces[f][2];
+			btVector3 v0=tet.m_n[index0]->m_x;
+			btVector3 v1=tet.m_n[index1]->m_x;
+			btVector3 v2=tet.m_n[index2]->m_x;
+
+
+		const btScalar			t=RayFromToCaster::rayFromToTriangle(	rayFrom,rayTo,dir,
+			v0,v1,v2,
+				mint);
+		if(t>0)
+			{
+				++cnt;
+				if(!bcountonly)
+				{
+					feature=btSoftBody::eFeature::Tetra;
+					index=i;
+					mint=t;
+				}
+			}
+		}
+	}
 	return(cnt);
 }
 
@@ -1941,25 +2227,29 @@ btVector3		btSoftBody::evaluateCom() const
 }
 
 //
-bool				btSoftBody::checkContact(	btCollisionObject* colObj,
+bool				btSoftBody::checkContact(	const btCollisionObjectWrapper* colObjWrap,
 											 const btVector3& x,
 											 btScalar margin,
 											 btSoftBody::sCti& cti) const
 {
-	btVector3			nrm;
-	btCollisionShape*	shp=colObj->getCollisionShape();
-	btRigidBody* tmpRigid = btRigidBody::upcast(colObj);
-	const btTransform&	wtr=tmpRigid? tmpRigid->getInterpolationWorldTransform() : colObj->getWorldTransform();
-	btScalar			dst=m_worldInfo->m_sparsesdf.Evaluate(	wtr.invXform(x),
-		shp,
-		nrm,
-		margin);
+	btVector3 nrm;
+	const btCollisionShape *shp = colObjWrap->getCollisionShape();
+//	const btRigidBody *tmpRigid = btRigidBody::upcast(colObjWrap->getCollisionObject());
+	//const btTransform &wtr = tmpRigid ? tmpRigid->getWorldTransform() : colObjWrap->getWorldTransform();
+	const btTransform &wtr = colObjWrap->getWorldTransform();
+	//todo: check which transform is needed here
+
+	btScalar dst = 
+		m_worldInfo->m_sparsesdf.Evaluate(	
+			wtr.invXform(x),
+			shp,
+			nrm,
+			margin);
 	if(dst<0)
 	{
-		cti.m_colObj		=	colObj;
-		cti.m_normal	=	wtr.getBasis()*nrm;
-		cti.m_offset	=	-btDot(	cti.m_normal,
-			x-cti.m_normal*dst);
+		cti.m_colObj = colObjWrap->getCollisionObject();
+		cti.m_normal = wtr.getBasis()*nrm;
+		cti.m_offset = -btDot( cti.m_normal, x - cti.m_normal * dst );
 		return(true);
 	}
 	return(false);
@@ -1968,6 +2258,7 @@ bool				btSoftBody::checkContact(	btCollisionObject* colObj,
 //
 void					btSoftBody::updateNormals()
 {
+
 	const btVector3	zv(0,0,0);
 	int i,ni;
 
@@ -1996,29 +2287,42 @@ void					btSoftBody::updateNormals()
 //
 void					btSoftBody::updateBounds()
 {
-	if(m_ndbvt.m_root)
+	/*if( m_acceleratedSoftBody )
 	{
-		const btVector3&	mins=m_ndbvt.m_root->volume.Mins();
-		const btVector3&	maxs=m_ndbvt.m_root->volume.Maxs();
-		const btScalar		csm=getCollisionShape()->getMargin();
-		const btVector3		mrg=btVector3(	csm,
-			csm,
-			csm)*1; // ??? to investigate...
-		m_bounds[0]=mins-mrg;
-		m_bounds[1]=maxs+mrg;
-		if(0!=getBroadphaseHandle())
-		{					
-			m_worldInfo->m_broadphase->setAabb(	getBroadphaseHandle(),
-				m_bounds[0],
-				m_bounds[1],
-				m_worldInfo->m_dispatcher);
+		// If we have an accelerated softbody we need to obtain the bounds correctly
+		// For now (slightly hackily) just have a very large AABB
+		// TODO: Write get bounds kernel
+		// If that is updating in place, atomic collisions might be low (when the cloth isn't perfectly aligned to an axis) and we could
+		// probably do a test and exchange reasonably efficiently.
+
+		m_bounds[0] = btVector3(-1000, -1000, -1000);
+		m_bounds[1] = btVector3(1000, 1000, 1000);
+
+	} else {*/
+		if(m_ndbvt.m_root)
+		{
+			const btVector3&	mins=m_ndbvt.m_root->volume.Mins();
+			const btVector3&	maxs=m_ndbvt.m_root->volume.Maxs();
+			const btScalar		csm=getCollisionShape()->getMargin();
+			const btVector3		mrg=btVector3(	csm,
+				csm,
+				csm)*1; // ??? to investigate...
+			m_bounds[0]=mins-mrg;
+			m_bounds[1]=maxs+mrg;
+			if(0!=getBroadphaseHandle())
+			{					
+				m_worldInfo->m_broadphase->setAabb(	getBroadphaseHandle(),
+					m_bounds[0],
+					m_bounds[1],
+					m_worldInfo->m_dispatcher);
+			}
 		}
-	}
-	else
-	{
-		m_bounds[0]=
-			m_bounds[1]=btVector3(0,0,0);
-	}		
+		else
+		{
+			m_bounds[0]=
+				m_bounds[1]=btVector3(0,0,0);
+		}		
+	//}
 }
 
 
@@ -2059,51 +2363,93 @@ void					btSoftBody::updatePose()
 }
 
 //
-void				btSoftBody::updateConstants()
+void				btSoftBody::updateArea(bool averageArea)
 {
 	int i,ni;
 
+	/* Face area		*/ 
+	for(i=0,ni=m_faces.size();i<ni;++i)
+	{
+		Face&		f=m_faces[i];
+		f.m_ra	=	AreaOf(f.m_n[0]->m_x,f.m_n[1]->m_x,f.m_n[2]->m_x);
+	}
+	
+	/* Node area		*/ 
+
+	if (averageArea)
+	{
+		btAlignedObjectArray<int>	counts;
+		counts.resize(m_nodes.size(),0);
+		for(i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			m_nodes[i].m_area	=	0;
+		}
+		for(i=0,ni=m_faces.size();i<ni;++i)
+		{
+			btSoftBody::Face&	f=m_faces[i];
+			for(int j=0;j<3;++j)
+			{
+				const int index=(int)(f.m_n[j]-&m_nodes[0]);
+				counts[index]++;
+				f.m_n[j]->m_area+=btFabs(f.m_ra);
+			}
+		}
+		for(i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			if(counts[i]>0)
+				m_nodes[i].m_area/=(btScalar)counts[i];
+			else
+				m_nodes[i].m_area=0;
+		}
+	}
+	else
+	{
+		// initialize node area as zero
+		for(i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			m_nodes[i].m_area=0;	
+		}
+
+		for(i=0,ni=m_faces.size();i<ni;++i)
+		{
+			btSoftBody::Face&	f=m_faces[i];
+
+			for(int j=0;j<3;++j)
+			{
+				f.m_n[j]->m_area += f.m_ra;
+			}
+		}
+
+		for(i=0,ni=m_nodes.size();i<ni;++i)
+		{
+			m_nodes[i].m_area *= 0.3333333f;
+		}
+	}
+}
+
+
+void				btSoftBody::updateLinkConstants()
+{	
+	int i,ni;
+
 	/* Links		*/ 
 	for(i=0,ni=m_links.size();i<ni;++i)
 	{
 		Link&		l=m_links[i];
 		Material&	m=*l.m_material;
-		l.m_rl	=	(l.m_n[0]->m_x-l.m_n[1]->m_x).length();
 		l.m_c0	=	(l.m_n[0]->m_im+l.m_n[1]->m_im)/m.m_kLST;
-		l.m_c1	=	l.m_rl*l.m_rl;
-	}
-	/* Faces		*/ 
-	for(i=0,ni=m_faces.size();i<ni;++i)
-	{
-		Face&		f=m_faces[i];
-		f.m_ra	=	AreaOf(f.m_n[0]->m_x,f.m_n[1]->m_x,f.m_n[2]->m_x);
-	}
-	/* Area's		*/ 
-	btAlignedObjectArray<int>	counts;
-	counts.resize(m_nodes.size(),0);
-	for(i=0,ni=m_nodes.size();i<ni;++i)
-	{
-		m_nodes[i].m_area	=	0;
-	}
-	for(i=0,ni=m_faces.size();i<ni;++i)
-	{
-		btSoftBody::Face&	f=m_faces[i];
-		for(int j=0;j<3;++j)
-		{
-			const int index=(int)(f.m_n[j]-&m_nodes[0]);
-			counts[index]++;
-			f.m_n[j]->m_area+=btFabs(f.m_ra);
-		}
-	}
-	for(i=0,ni=m_nodes.size();i<ni;++i)
-	{
-		if(counts[i]>0)
-			m_nodes[i].m_area/=(btScalar)counts[i];
-		else
-			m_nodes[i].m_area=0;
 	}
 }
 
+void				btSoftBody::updateConstants()
+{
+	resetLinkRestLengths();
+	updateLinkConstants();
+	updateArea();
+}
+
+
+
 //
 void					btSoftBody::initializeClusters()
 {
@@ -2325,7 +2671,7 @@ void					btSoftBody::solveClusters(btScalar sor)
 void					btSoftBody::applyClusters(bool drift)
 {
 	BT_PROFILE("ApplyClusters");
-	const btScalar					f0=m_sst.sdt;
+//	const btScalar					f0=m_sst.sdt;
 	//const btScalar					f1=f0/2;
 	btAlignedObjectArray<btVector3> deltas;
 	btAlignedObjectArray<btScalar> weights;
@@ -2365,7 +2711,10 @@ void					btSoftBody::applyClusters(bool drift)
 	}
 	for(i=0;i<deltas.size();++i)
 	{
-		if(weights[i]>0) m_nodes[i].m_x+=deltas[i]/weights[i];
+		if(weights[i]>0) 
+		{
+			m_nodes[i].m_x+=deltas[i]/weights[i];
+		}
 	}
 }
 
@@ -2569,27 +2918,27 @@ void				btSoftBody::applyForces()
 {
 
 	BT_PROFILE("SoftBody applyForces");
-	const btScalar					dt=m_sst.sdt;
-	const btScalar					kLF=m_cfg.kLF;
-	const btScalar					kDG=m_cfg.kDG;
-	const btScalar					kPR=m_cfg.kPR;
-	const btScalar					kVC=m_cfg.kVC;
-	const bool						as_lift=kLF>0;
-	const bool						as_drag=kDG>0;
-	const bool						as_pressure=kPR!=0;
-	const bool						as_volume=kVC>0;
-	const bool						as_aero=	as_lift		||
-		as_drag		;
-	const bool						as_vaero=	as_aero		&&
-		(m_cfg.aeromodel<btSoftBody::eAeroModel::F_TwoSided);
-	const bool						as_faero=	as_aero		&&
-		(m_cfg.aeromodel>=btSoftBody::eAeroModel::F_TwoSided);
-	const bool						use_medium=	as_aero;
-	const bool						use_volume=	as_pressure	||
+//	const btScalar					dt =			m_sst.sdt;
+	const btScalar					kLF =			m_cfg.kLF;
+	const btScalar					kDG =			m_cfg.kDG;
+	const btScalar					kPR =			m_cfg.kPR;
+	const btScalar					kVC =			m_cfg.kVC;
+	const bool						as_lift =		kLF>0;
+	const bool						as_drag =		kDG>0;
+	const bool						as_pressure =	kPR!=0;
+	const bool						as_volume =		kVC>0;
+	const bool						as_aero =		as_lift	||
+													as_drag		;
+	//const bool						as_vaero =		as_aero	&&
+	//												(m_cfg.aeromodel < btSoftBody::eAeroModel::F_TwoSided);
+	//const bool						as_faero =		as_aero	&&
+	//												(m_cfg.aeromodel >= btSoftBody::eAeroModel::F_TwoSided);
+	const bool						use_medium =	as_aero;
+	const bool						use_volume =	as_pressure	||
 		as_volume	;
-	btScalar						volume=0;
-	btScalar						ivolumetp=0;
-	btScalar						dvolumetv=0;
+	btScalar						volume =		0;
+	btScalar						ivolumetp =		0;
+	btScalar						dvolumetv =		0;
 	btSoftBody::sMedium	medium;
 	if(use_volume)
 	{
@@ -2607,36 +2956,8 @@ void				btSoftBody::applyForces()
 		{
 			if(use_medium)
 			{
-				EvaluateMedium(m_worldInfo,n.m_x,medium);
 				/* Aerodynamics			*/ 
-				if(as_vaero)
-				{				
-					const btVector3	rel_v=n.m_v-medium.m_velocity;
-					const btScalar	rel_v2=rel_v.length2();
-					if(rel_v2>SIMD_EPSILON)
-					{
-						btVector3	nrm=n.m_n;
-						/* Setup normal		*/ 
-						switch(m_cfg.aeromodel)
-						{
-						case	btSoftBody::eAeroModel::V_Point:
-							nrm=NormalizeAny(rel_v);break;
-						case	btSoftBody::eAeroModel::V_TwoSided:
-							nrm*=(btScalar)(btDot(nrm,rel_v)<0?-1:+1);break;
-						}
-						const btScalar	dvn=btDot(rel_v,nrm);
-						/* Compute forces	*/ 
-						if(dvn>0)
-						{
-							btVector3		force(0,0,0);
-							const btScalar	c0	=	n.m_area*dvn*rel_v2/2;
-							const btScalar	c1	=	c0*medium.m_density;
-							force	+=	nrm*(-c1*kLF);
-							force	+=	rel_v.normalized()*(-c1*kDG);
-							ApplyClampedForce(n,force,dt);
-						}
-					}
-				}
+				addAeroForceToNode(m_windVelocity, i);
 			}
 			/* Pressure				*/ 
 			if(as_pressure)
@@ -2650,40 +2971,14 @@ void				btSoftBody::applyForces()
 			}
 		}
 	}
+
 	/* Per face forces				*/ 
 	for(i=0,ni=m_faces.size();i<ni;++i)
 	{
-		btSoftBody::Face&	f=m_faces[i];
-		if(as_faero)
-		{
-			const btVector3	v=(f.m_n[0]->m_v+f.m_n[1]->m_v+f.m_n[2]->m_v)/3;
-			const btVector3	x=(f.m_n[0]->m_x+f.m_n[1]->m_x+f.m_n[2]->m_x)/3;
-			EvaluateMedium(m_worldInfo,x,medium);
-			const btVector3	rel_v=v-medium.m_velocity;
-			const btScalar	rel_v2=rel_v.length2();
-			if(rel_v2>SIMD_EPSILON)
-			{
-				btVector3	nrm=f.m_normal;
-				/* Setup normal		*/ 
-				switch(m_cfg.aeromodel)
-				{
-				case	btSoftBody::eAeroModel::F_TwoSided:
-					nrm*=(btScalar)(btDot(nrm,rel_v)<0?-1:+1);break;
-				}
-				const btScalar	dvn=btDot(rel_v,nrm);
-				/* Compute forces	*/ 
-				if(dvn>0)
-				{
-					btVector3		force(0,0,0);
-					const btScalar	c0	=	f.m_ra*dvn*rel_v2;
-					const btScalar	c1	=	c0*medium.m_density;
-					force	+=	nrm*(-c1*kLF);
-					force	+=	rel_v.normalized()*(-c1*kDG);
-					force	/=	3;
-					for(int j=0;j<3;++j) ApplyClampedForce(*f.m_n[j],force,dt);
-				}
-			}
-		}
+	//	btSoftBody::Face&	f=m_faces[i];
+
+		/* Aerodynamics			*/ 
+		addAeroForceToFace(m_windVelocity, i);	
 	}
 }
 
@@ -2695,39 +2990,40 @@ void				btSoftBody::PSolve_Anchors(btSoftBody* psb,btScalar kst,btScalar ti)
 	for(int i=0,ni=psb->m_anchors.size();i<ni;++i)
 	{
 		const Anchor&		a=psb->m_anchors[i];
-		const btTransform&	t=a.m_body->getInterpolationWorldTransform();
+		const btTransform&	t=a.m_body->getWorldTransform();
 		Node&				n=*a.m_node;
 		const btVector3		wa=t*a.m_local;
 		const btVector3		va=a.m_body->getVelocityInLocalPoint(a.m_c1)*dt;
 		const btVector3		vb=n.m_x-n.m_q;
 		const btVector3		vr=(va-vb)+(wa-n.m_x)*kAHR;
-		const btVector3		impulse=a.m_c0*vr;
+		const btVector3		impulse=a.m_c0*vr*a.m_influence;
 		n.m_x+=impulse*a.m_c2;
 		a.m_body->applyImpulse(-impulse,a.m_c1);
 	}
 }
 
 //
-void				btSoftBody::PSolve_RContacts(btSoftBody* psb,btScalar kst,btScalar ti)
+void btSoftBody::PSolve_RContacts(btSoftBody* psb, btScalar kst, btScalar ti)
 {
-	const btScalar	dt=psb->m_sst.sdt;
-	const btScalar	mrg=psb->getCollisionShape()->getMargin();
+	const btScalar	dt = psb->m_sst.sdt;
+	const btScalar	mrg = psb->getCollisionShape()->getMargin();
 	for(int i=0,ni=psb->m_rcontacts.size();i<ni;++i)
 	{
-		const RContact&		c=psb->m_rcontacts[i];
-		const sCti&			cti=c.m_cti;	
-		btRigidBody* tmpRigid = btRigidBody::upcast(cti.m_colObj);
+		const RContact&		c = psb->m_rcontacts[i];
+		const sCti&			cti = c.m_cti;	
+		btRigidBody* tmpRigid = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
 
-		const btVector3		va=tmpRigid ? tmpRigid->getVelocityInLocalPoint(c.m_c1)*dt : btVector3(0,0,0);
-		const btVector3		vb=c.m_node->m_x-c.m_node->m_q;	
-		const btVector3		vr=vb-va;
-		const btScalar		dn=btDot(vr,cti.m_normal);		
+		const btVector3		va = tmpRigid ? tmpRigid->getVelocityInLocalPoint(c.m_c1)*dt : btVector3(0,0,0);
+		const btVector3		vb = c.m_node->m_x-c.m_node->m_q;	
+		const btVector3		vr = vb-va;
+		const btScalar		dn = btDot(vr, cti.m_normal);		
 		if(dn<=SIMD_EPSILON)
 		{
-			const btScalar		dp=btMin(btDot(c.m_node->m_x,cti.m_normal)+cti.m_offset,mrg);
-			const btVector3		fv=vr-cti.m_normal*dn;
-			const btVector3		impulse=c.m_c0*((vr-fv*c.m_c3+cti.m_normal*(dp*c.m_c4))*kst);
-			c.m_node->m_x-=impulse*c.m_c2;
+			const btScalar		dp = btMin( (btDot(c.m_node->m_x, cti.m_normal) + cti.m_offset), mrg );
+			const btVector3		fv = vr - (cti.m_normal * dn);
+			// c0 is the impulse matrix, c3 is 1 - the friction coefficient or 0, c4 is the contact hardness coefficient
+			const btVector3		impulse = c.m_c0 * ( (vr - (fv * c.m_c3) + (cti.m_normal * (dp * c.m_c4))) * kst );
+			c.m_node->m_x -= impulse * c.m_c2;
 			if (tmpRigid)
 				tmpRigid->applyImpulse(impulse,c.m_c1);
 		}
@@ -2779,10 +3075,12 @@ void				btSoftBody::PSolve_Links(btSoftBody* psb,btScalar kst,btScalar ti)
 			Node&			b=*l.m_n[1];
 			const btVector3	del=b.m_x-a.m_x;
 			const btScalar	len=del.length2();
-			const btScalar	k=((l.m_c1-len)/(l.m_c0*(l.m_c1+len)))*kst;
-			//const btScalar	t=k*a.m_im;
-			a.m_x-=del*(k*a.m_im);
-			b.m_x+=del*(k*b.m_im);
+			if (l.m_c1+len > SIMD_EPSILON)
+			{
+				const btScalar	k=((l.m_c1-len)/(l.m_c0*(l.m_c1+len)))*kst;
+				a.m_x-=del*(k*a.m_im);
+				b.m_x+=del*(k*b.m_im);
+			}
 		}
 	}
 }
@@ -2813,6 +3111,9 @@ btSoftBody::psolver_t	btSoftBody::getSolver(ePSolver::_ solver)
 		return(&btSoftBody::PSolve_RContacts);
 	case	ePSolver::SContacts:	
 		return(&btSoftBody::PSolve_SContacts);	
+		default:
+		{
+		}
 	}
 	return(0);
 }
@@ -2823,34 +3124,38 @@ btSoftBody::vsolver_t	btSoftBody::getSolver(eVSolver::_ solver)
 	switch(solver)
 	{
 	case	eVSolver::Linear:		return(&btSoftBody::VSolve_Links);
+		default:
+		{
+		}
 	}
 	return(0);
 }
 
 //
-void			btSoftBody::defaultCollisionHandler(btCollisionObject* pco)
+void			btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap)
 {
+
 	switch(m_cfg.collisions&fCollision::RVSmask)
 	{
 	case	fCollision::SDF_RS:
 		{
 			btSoftColliders::CollideSDF_RS	docollide;		
-			btRigidBody*		prb1=btRigidBody::upcast(pco);
-			btTransform	wtr=prb1 ? prb1->getInterpolationWorldTransform() : pco->getWorldTransform();
+			btRigidBody*		prb1=(btRigidBody*) btRigidBody::upcast(pcoWrap->getCollisionObject());
+			btTransform	wtr=pcoWrap->getWorldTransform();
 
-			const btTransform	ctr=pco->getWorldTransform();
+			const btTransform	ctr=pcoWrap->getWorldTransform();
 			const btScalar		timemargin=(wtr.getOrigin()-ctr.getOrigin()).length();
 			const btScalar		basemargin=getCollisionShape()->getMargin();
 			btVector3			mins;
 			btVector3			maxs;
 			ATTRIBUTE_ALIGNED16(btDbvtVolume)		volume;
-			pco->getCollisionShape()->getAabb(	pco->getInterpolationWorldTransform(),
+			pcoWrap->getCollisionShape()->getAabb(	pcoWrap->getWorldTransform(),
 				mins,
 				maxs);
 			volume=btDbvtVolume::FromMM(mins,maxs);
 			volume.Expand(btVector3(basemargin,basemargin,basemargin));		
 			docollide.psb		=	this;
-			docollide.m_colObj1 = pco;
+			docollide.m_colObj1Wrap = pcoWrap;
 			docollide.m_rigidBody = prb1;
 
 			docollide.dynmargin	=	basemargin+timemargin;
@@ -2861,7 +3166,7 @@ void			btSoftBody::defaultCollisionHandler(btCollisionObject* pco)
 	case	fCollision::CL_RS:
 		{
 			btSoftColliders::CollideCL_RS	collider;
-			collider.Process(this,pco);
+			collider.ProcessColObj(this,pcoWrap);
 		}
 		break;
 	}
@@ -2880,7 +3185,7 @@ void			btSoftBody::defaultCollisionHandler(btSoftBody* psb)
 			if (this!=psb || psb->m_cfg.collisions&fCollision::CL_SELF)
 			{
 				btSoftColliders::CollideCL_SS	docollide;
-				docollide.Process(this,psb);
+				docollide.ProcessSoftSoft(this,psb);
 			}
 			
 		}
@@ -2915,3 +3220,420 @@ void			btSoftBody::defaultCollisionHandler(btSoftBody* psb)
 		}
 	}
 }
+
+
+
+void btSoftBody::setWindVelocity( const btVector3 &velocity )
+{
+	m_windVelocity = velocity;
+}
+
+
+const btVector3& btSoftBody::getWindVelocity()
+{
+	return m_windVelocity;
+}
+
+
+
+int	btSoftBody::calculateSerializeBufferSize()	const
+{
+	int sz = sizeof(btSoftBodyData);
+	return sz;
+}
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+const char*	btSoftBody::serialize(void* dataBuffer, class btSerializer* serializer) const
+{
+	btSoftBodyData* sbd = (btSoftBodyData*) dataBuffer;
+
+	btCollisionObject::serialize(&sbd->m_collisionObjectData, serializer);
+
+	btHashMap<btHashPtr,int>	m_nodeIndexMap;
+
+	sbd->m_numMaterials = m_materials.size();
+	sbd->m_materials = sbd->m_numMaterials? (SoftBodyMaterialData**) serializer->getUniquePointer((void*)&m_materials): 0;
+
+	if (sbd->m_materials)
+	{
+		int sz = sizeof(SoftBodyMaterialData*);
+		int numElem = sbd->m_numMaterials;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		//SoftBodyMaterialData** memPtr = chunk->m_oldPtr;
+		SoftBodyMaterialData** memPtr = (SoftBodyMaterialData**)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			btSoftBody::Material* mat = m_materials[i];
+			*memPtr = mat ? (SoftBodyMaterialData*)serializer->getUniquePointer((void*)mat) : 0;
+			if (!serializer->findPointer(mat))
+			{
+				//serialize it here
+				btChunk* chunk = serializer->allocate(sizeof(SoftBodyMaterialData),1);
+				SoftBodyMaterialData* memPtr = (SoftBodyMaterialData*)chunk->m_oldPtr;
+				memPtr->m_flags = mat->m_flags;
+				memPtr->m_angularStiffness = mat->m_kAST;
+				memPtr->m_linearStiffness = mat->m_kLST;
+				memPtr->m_volumeStiffness = mat->m_kVST;
+				serializer->finalizeChunk(chunk,"SoftBodyMaterialData",BT_SBMATERIAL_CODE,mat);
+			}
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyMaterialData",BT_ARRAY_CODE,(void*) &m_materials);
+	}
+
+
+	
+
+	sbd->m_numNodes = m_nodes.size();
+	sbd->m_nodes = sbd->m_numNodes ? (SoftBodyNodeData*)serializer->getUniquePointer((void*)&m_nodes): 0;
+	if (sbd->m_nodes)
+	{
+		int sz = sizeof(SoftBodyNodeData);
+		int numElem = sbd->m_numNodes;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyNodeData* memPtr = (SoftBodyNodeData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_nodes[i].m_f.serializeFloat( memPtr->m_accumulatedForce);
+			memPtr->m_area = m_nodes[i].m_area;
+			memPtr->m_attach = m_nodes[i].m_battach;
+			memPtr->m_inverseMass = m_nodes[i].m_im;
+			memPtr->m_material = m_nodes[i].m_material? (SoftBodyMaterialData*)serializer->getUniquePointer((void*) m_nodes[i].m_material):0;
+			m_nodes[i].m_n.serializeFloat(memPtr->m_normal);
+			m_nodes[i].m_x.serializeFloat(memPtr->m_position);
+			m_nodes[i].m_q.serializeFloat(memPtr->m_previousPosition);
+			m_nodes[i].m_v.serializeFloat(memPtr->m_velocity);
+			m_nodeIndexMap.insert(&m_nodes[i],i);
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyNodeData",BT_SBNODE_CODE,(void*) &m_nodes);
+	}
+
+	sbd->m_numLinks = m_links.size();
+	sbd->m_links = sbd->m_numLinks? (SoftBodyLinkData*) serializer->getUniquePointer((void*)&m_links[0]):0;
+	if (sbd->m_links)
+	{
+		int sz = sizeof(SoftBodyLinkData);
+		int numElem = sbd->m_numLinks;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyLinkData* memPtr = (SoftBodyLinkData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_bbending = m_links[i].m_bbending;
+			memPtr->m_material = m_links[i].m_material? (SoftBodyMaterialData*)serializer->getUniquePointer((void*) m_links[i].m_material):0;
+			memPtr->m_nodeIndices[0] = m_links[i].m_n[0] ? m_links[i].m_n[0] - &m_nodes[0]: -1;
+			memPtr->m_nodeIndices[1] = m_links[i].m_n[1] ? m_links[i].m_n[1] - &m_nodes[0]: -1;
+			btAssert(memPtr->m_nodeIndices[0]<m_nodes.size());
+			btAssert(memPtr->m_nodeIndices[1]<m_nodes.size());
+			memPtr->m_restLength = m_links[i].m_rl;
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyLinkData",BT_ARRAY_CODE,(void*) &m_links[0]);
+
+	}
+
+
+	sbd->m_numFaces = m_faces.size();
+	sbd->m_faces = sbd->m_numFaces? (SoftBodyFaceData*) serializer->getUniquePointer((void*)&m_faces[0]):0;
+	if (sbd->m_faces)
+	{
+		int sz = sizeof(SoftBodyFaceData);
+		int numElem = sbd->m_numFaces;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyFaceData* memPtr = (SoftBodyFaceData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_material = m_faces[i].m_material ?  (SoftBodyMaterialData*) serializer->getUniquePointer((void*)m_faces[i].m_material): 0;
+			m_faces[i].m_normal.serializeFloat(	memPtr->m_normal);
+			for (int j=0;j<3;j++)
+			{
+				memPtr->m_nodeIndices[j] = m_faces[i].m_n[j]? m_faces[i].m_n[j] - &m_nodes[0]: -1;
+			}
+			memPtr->m_restArea = m_faces[i].m_ra;
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyFaceData",BT_ARRAY_CODE,(void*) &m_faces[0]);
+	}
+
+
+	sbd->m_numTetrahedra = m_tetras.size();
+	sbd->m_tetrahedra = sbd->m_numTetrahedra ? (SoftBodyTetraData*) serializer->getUniquePointer((void*)&m_tetras[0]):0;
+	if (sbd->m_tetrahedra)
+	{
+		int sz = sizeof(SoftBodyTetraData);
+		int numElem = sbd->m_numTetrahedra;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyTetraData* memPtr = (SoftBodyTetraData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			for (int j=0;j<4;j++)
+			{
+				m_tetras[i].m_c0[j].serializeFloat(	memPtr->m_c0[j] );
+				memPtr->m_nodeIndices[j] = m_tetras[j].m_n[j]? m_tetras[j].m_n[j]-&m_nodes[0] : -1;
+			}
+			memPtr->m_c1 = m_tetras[i].m_c1;
+			memPtr->m_c2 = m_tetras[i].m_c2;
+			memPtr->m_material = m_tetras[i].m_material ? (SoftBodyMaterialData*)serializer->getUniquePointer((void*) m_tetras[i].m_material): 0;
+			memPtr->m_restVolume = m_tetras[i].m_rv;
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyTetraData",BT_ARRAY_CODE,(void*) &m_tetras[0]);
+	}
+
+	sbd->m_numAnchors = m_anchors.size();
+	sbd->m_anchors = sbd->m_numAnchors ? (SoftRigidAnchorData*) serializer->getUniquePointer((void*)&m_anchors[0]):0;
+	if (sbd->m_anchors)
+	{
+		int sz = sizeof(SoftRigidAnchorData);
+		int numElem = sbd->m_numAnchors;
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftRigidAnchorData* memPtr = (SoftRigidAnchorData*)chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			m_anchors[i].m_c0.serializeFloat(memPtr->m_c0);
+			m_anchors[i].m_c1.serializeFloat(memPtr->m_c1);
+			memPtr->m_c2 = m_anchors[i].m_c2;
+			m_anchors[i].m_local.serializeFloat(memPtr->m_localFrame);
+			memPtr->m_nodeIndex = m_anchors[i].m_node? m_anchors[i].m_node-&m_nodes[0]: -1;
+			
+			memPtr->m_rigidBody = m_anchors[i].m_body? (btRigidBodyData*)  serializer->getUniquePointer((void*)m_anchors[i].m_body): 0;
+			btAssert(memPtr->m_nodeIndex < m_nodes.size());
+		}
+		serializer->finalizeChunk(chunk,"SoftRigidAnchorData",BT_ARRAY_CODE,(void*) &m_anchors[0]);
+	}
+	
+
+	sbd->m_config.m_dynamicFriction = m_cfg.kDF;
+	sbd->m_config.m_baumgarte = m_cfg.kVCF;
+	sbd->m_config.m_pressure = m_cfg.kPR;
+	sbd->m_config.m_aeroModel = this->m_cfg.aeromodel;
+	sbd->m_config.m_lift = m_cfg.kLF;
+	sbd->m_config.m_drag = m_cfg.kDG;
+	sbd->m_config.m_positionIterations = m_cfg.piterations;
+	sbd->m_config.m_driftIterations = m_cfg.diterations;
+	sbd->m_config.m_clusterIterations = m_cfg.citerations;
+	sbd->m_config.m_velocityIterations = m_cfg.viterations;
+	sbd->m_config.m_maxVolume = m_cfg.maxvolume;
+	sbd->m_config.m_damping = m_cfg.kDP;
+	sbd->m_config.m_poseMatch = m_cfg.kMT;
+	sbd->m_config.m_collisionFlags = m_cfg.collisions;
+	sbd->m_config.m_volume = m_cfg.kVC;
+	sbd->m_config.m_rigidContactHardness = m_cfg.kCHR;
+	sbd->m_config.m_kineticContactHardness = m_cfg.kKHR;
+	sbd->m_config.m_softContactHardness = m_cfg.kSHR;
+	sbd->m_config.m_anchorHardness = m_cfg.kAHR;
+	sbd->m_config.m_timeScale = m_cfg.timescale;
+	sbd->m_config.m_maxVolume = m_cfg.maxvolume;
+	sbd->m_config.m_softRigidClusterHardness = m_cfg.kSRHR_CL;
+	sbd->m_config.m_softKineticClusterHardness = m_cfg.kSKHR_CL;
+	sbd->m_config.m_softSoftClusterHardness = m_cfg.kSSHR_CL;
+	sbd->m_config.m_softRigidClusterImpulseSplit = m_cfg.kSR_SPLT_CL;
+	sbd->m_config.m_softKineticClusterImpulseSplit = m_cfg.kSK_SPLT_CL;
+	sbd->m_config.m_softSoftClusterImpulseSplit = m_cfg.kSS_SPLT_CL;
+
+	//pose for shape matching
+	{
+		sbd->m_pose = (SoftBodyPoseData*)serializer->getUniquePointer((void*)&m_pose);
+
+		int sz = sizeof(SoftBodyPoseData);
+		btChunk* chunk = serializer->allocate(sz,1);
+		SoftBodyPoseData* memPtr = (SoftBodyPoseData*)chunk->m_oldPtr;
+		
+		m_pose.m_aqq.serializeFloat(memPtr->m_aqq);
+		memPtr->m_bframe = m_pose.m_bframe;
+		memPtr->m_bvolume = m_pose.m_bvolume;
+		m_pose.m_com.serializeFloat(memPtr->m_com);
+		
+		memPtr->m_numPositions = m_pose.m_pos.size();
+		memPtr->m_positions = memPtr->m_numPositions ? (btVector3FloatData*)serializer->getUniquePointer((void*)&m_pose.m_pos[0]): 0;
+		if (memPtr->m_numPositions)
+		{
+			int numElem = memPtr->m_numPositions;
+			int sz = sizeof(btVector3Data);
+			btChunk* chunk = serializer->allocate(sz,numElem);
+			btVector3FloatData* memPtr = (btVector3FloatData*)chunk->m_oldPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				m_pose.m_pos[i].serializeFloat(*memPtr);
+			}
+			serializer->finalizeChunk(chunk,"btVector3FloatData",BT_ARRAY_CODE,(void*)&m_pose.m_pos[0]);
+		}
+		memPtr->m_restVolume = m_pose.m_volume;
+		m_pose.m_rot.serializeFloat(memPtr->m_rot);
+		m_pose.m_scl.serializeFloat(memPtr->m_scale);
+
+		memPtr->m_numWeigts = m_pose.m_wgh.size();
+		memPtr->m_weights = memPtr->m_numWeigts? (float*) serializer->getUniquePointer((void*) &m_pose.m_wgh[0]) : 0;
+		if (memPtr->m_numWeigts)
+		{
+			
+			int numElem = memPtr->m_numWeigts;
+			int sz = sizeof(float);
+			btChunk* chunk = serializer->allocate(sz,numElem);
+			float* memPtr = (float*) chunk->m_oldPtr;
+			for (int i=0;i<numElem;i++,memPtr++)
+			{
+				*memPtr = m_pose.m_wgh[i];
+			}
+			serializer->finalizeChunk(chunk,"float",BT_ARRAY_CODE,(void*)&m_pose.m_wgh[0]);
+		}
+
+		serializer->finalizeChunk(chunk,"SoftBodyPoseData",BT_ARRAY_CODE,(void*)&m_pose);
+	}
+
+	//clusters for convex-cluster collision detection
+
+	sbd->m_numClusters = m_clusters.size();
+	sbd->m_clusters = sbd->m_numClusters? (SoftBodyClusterData*) serializer->getUniquePointer((void*)m_clusters[0]) : 0;
+	if (sbd->m_numClusters)
+	{
+		int numElem = sbd->m_numClusters;
+		int sz = sizeof(SoftBodyClusterData);
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		SoftBodyClusterData* memPtr = (SoftBodyClusterData*) chunk->m_oldPtr;
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_adamping= m_clusters[i]->m_adamping;
+			m_clusters[i]->m_av.serializeFloat(memPtr->m_av);
+			memPtr->m_clusterIndex = m_clusters[i]->m_clusterIndex;
+			memPtr->m_collide = m_clusters[i]->m_collide;
+			m_clusters[i]->m_com.serializeFloat(memPtr->m_com);
+			memPtr->m_containsAnchor = m_clusters[i]->m_containsAnchor;
+			m_clusters[i]->m_dimpulses[0].serializeFloat(memPtr->m_dimpulses[0]);
+			m_clusters[i]->m_dimpulses[1].serializeFloat(memPtr->m_dimpulses[1]);
+			m_clusters[i]->m_framexform.serializeFloat(memPtr->m_framexform);
+			memPtr->m_idmass = m_clusters[i]->m_idmass;
+			memPtr->m_imass = m_clusters[i]->m_imass;
+			m_clusters[i]->m_invwi.serializeFloat(memPtr->m_invwi);
+			memPtr->m_ldamping = m_clusters[i]->m_ldamping;
+			m_clusters[i]->m_locii.serializeFloat(memPtr->m_locii);
+			m_clusters[i]->m_lv.serializeFloat(memPtr->m_lv);
+			memPtr->m_matching = m_clusters[i]->m_matching;
+			memPtr->m_maxSelfCollisionImpulse = m_clusters[i]->m_maxSelfCollisionImpulse;
+			memPtr->m_ndamping = m_clusters[i]->m_ndamping;
+			memPtr->m_ldamping = m_clusters[i]->m_ldamping;
+			memPtr->m_adamping = m_clusters[i]->m_adamping;
+			memPtr->m_selfCollisionImpulseFactor = m_clusters[i]->m_selfCollisionImpulseFactor;
+
+			memPtr->m_numFrameRefs = m_clusters[i]->m_framerefs.size();
+			memPtr->m_numMasses = m_clusters[i]->m_masses.size();
+			memPtr->m_numNodes = m_clusters[i]->m_nodes.size();
+
+			memPtr->m_nvimpulses = m_clusters[i]->m_nvimpulses;
+			m_clusters[i]->m_vimpulses[0].serializeFloat(memPtr->m_vimpulses[0]);
+			m_clusters[i]->m_vimpulses[1].serializeFloat(memPtr->m_vimpulses[1]);
+			memPtr->m_ndimpulses = m_clusters[i]->m_ndimpulses;
+
+			
+
+			memPtr->m_framerefs = memPtr->m_numFrameRefs? (btVector3FloatData*)serializer->getUniquePointer((void*)&m_clusters[i]->m_framerefs[0]) : 0;
+			if (memPtr->m_framerefs)
+			{
+				int numElem = memPtr->m_numFrameRefs;
+				int sz = sizeof(btVector3FloatData);
+				btChunk* chunk = serializer->allocate(sz,numElem);
+				btVector3FloatData* memPtr = (btVector3FloatData*) chunk->m_oldPtr;
+				for (int j=0;j<numElem;j++,memPtr++)
+				{
+					m_clusters[i]->m_framerefs[j].serializeFloat(*memPtr);
+				}
+				serializer->finalizeChunk(chunk,"btVector3FloatData",BT_ARRAY_CODE,(void*)&m_clusters[i]->m_framerefs[0]);
+			}
+			
+			memPtr->m_masses = memPtr->m_numMasses ? (float*) serializer->getUniquePointer((void*)&m_clusters[i]->m_masses[0]): 0;
+			if (memPtr->m_masses)
+			{
+				int numElem = memPtr->m_numMasses;
+				int sz = sizeof(float);
+				btChunk* chunk = serializer->allocate(sz,numElem);
+				float* memPtr = (float*) chunk->m_oldPtr;
+				for (int j=0;j<numElem;j++,memPtr++)
+				{
+					*memPtr = m_clusters[i]->m_masses[j];
+				}
+				serializer->finalizeChunk(chunk,"float",BT_ARRAY_CODE,(void*)&m_clusters[i]->m_masses[0]);
+			}
+
+			memPtr->m_nodeIndices  = memPtr->m_numNodes ? (int*) serializer->getUniquePointer((void*) &m_clusters[i]->m_nodes) : 0;
+			if (memPtr->m_nodeIndices )
+			{
+				int numElem = memPtr->m_numMasses;
+				int sz = sizeof(int);
+				btChunk* chunk = serializer->allocate(sz,numElem);
+				int* memPtr = (int*) chunk->m_oldPtr;
+				for (int j=0;j<numElem;j++,memPtr++)
+				{
+					int* indexPtr = m_nodeIndexMap.find(m_clusters[i]->m_nodes[j]);
+					btAssert(indexPtr);
+					*memPtr = *indexPtr;
+				}
+				serializer->finalizeChunk(chunk,"int",BT_ARRAY_CODE,(void*)&m_clusters[i]->m_nodes);
+			}
+		}
+		serializer->finalizeChunk(chunk,"SoftBodyClusterData",BT_ARRAY_CODE,(void*)m_clusters[0]);
+
+	}
+	
+
+	
+	sbd->m_numJoints = m_joints.size();
+	sbd->m_joints = m_joints.size()? (btSoftBodyJointData*) serializer->getUniquePointer((void*)&m_joints[0]) : 0;
+
+	if (sbd->m_joints)
+	{
+		int sz = sizeof(btSoftBodyJointData);
+		int numElem = m_joints.size();
+		btChunk* chunk = serializer->allocate(sz,numElem);
+		btSoftBodyJointData* memPtr = (btSoftBodyJointData*)chunk->m_oldPtr;
+
+		for (int i=0;i<numElem;i++,memPtr++)
+		{
+			memPtr->m_jointType = (int)m_joints[i]->Type();
+			m_joints[i]->m_refs[0].serializeFloat(memPtr->m_refs[0]);
+			m_joints[i]->m_refs[1].serializeFloat(memPtr->m_refs[1]);
+			memPtr->m_cfm = m_joints[i]->m_cfm;
+			memPtr->m_erp = m_joints[i]->m_erp;
+			memPtr->m_split = m_joints[i]->m_split;
+			memPtr->m_delete = m_joints[i]->m_delete;
+			
+			for (int j=0;j<4;j++)
+			{
+				memPtr->m_relPosition[0].m_floats[j] = 0.f;
+				memPtr->m_relPosition[1].m_floats[j] = 0.f;
+			}
+			memPtr->m_bodyA = 0;
+			memPtr->m_bodyB = 0;
+			if (m_joints[i]->m_bodies[0].m_soft)
+			{
+				memPtr->m_bodyAtype = BT_JOINT_SOFT_BODY_CLUSTER;
+				memPtr->m_bodyA = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[0].m_soft);
+			}
+			if (m_joints[i]->m_bodies[0].m_collisionObject)
+			{
+				memPtr->m_bodyAtype = BT_JOINT_COLLISION_OBJECT;
+				memPtr->m_bodyA = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[0].m_collisionObject);
+			}
+			if (m_joints[i]->m_bodies[0].m_rigid)
+			{
+				memPtr->m_bodyAtype = BT_JOINT_RIGID_BODY;
+				memPtr->m_bodyA = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[0].m_rigid);
+			}
+
+			if (m_joints[i]->m_bodies[1].m_soft)
+			{
+				memPtr->m_bodyBtype = BT_JOINT_SOFT_BODY_CLUSTER;
+				memPtr->m_bodyB = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[1].m_soft);
+			}
+			if (m_joints[i]->m_bodies[1].m_collisionObject)
+			{
+				memPtr->m_bodyBtype = BT_JOINT_COLLISION_OBJECT;
+				memPtr->m_bodyB = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[1].m_collisionObject);
+			}
+			if (m_joints[i]->m_bodies[1].m_rigid)
+			{
+				memPtr->m_bodyBtype = BT_JOINT_RIGID_BODY;
+				memPtr->m_bodyB = serializer->getUniquePointer((void*)m_joints[i]->m_bodies[1].m_rigid);
+			}
+		}
+		serializer->finalizeChunk(chunk,"btSoftBodyJointData",BT_ARRAY_CODE,(void*) &m_joints[0]);
+	}
+
+
+	return btSoftBodyDataName;
+}
+
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.h
index d69e835f1..2116c34f0 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBody.h
@@ -27,8 +27,17 @@ subject to the following restrictions:
 #include "btSparseSDF.h"
 #include "BulletCollision/BroadphaseCollision/btDbvt.h"
 
+//#ifdef BT_USE_DOUBLE_PRECISION
+//#define btRigidBodyData	btRigidBodyDoubleData
+//#define btRigidBodyDataName	"btRigidBodyDoubleData"
+//#else
+#define btSoftBodyData	btSoftBodyFloatData
+#define btSoftBodyDataName	"btSoftBodyFloatData"
+//#endif //BT_USE_DOUBLE_PRECISION
+
 class btBroadphaseInterface;
 class btDispatcher;
+class btSoftBodySolver;
 
 /* btSoftBodyWorldInfo	*/ 
 struct	btSoftBodyWorldInfo
@@ -41,6 +50,17 @@ struct	btSoftBodyWorldInfo
 	btDispatcher*	m_dispatcher;
 	btVector3				m_gravity;
 	btSparseSdf<3>			m_sparsesdf;
+
+	btSoftBodyWorldInfo()
+		:air_density((btScalar)1.2),
+		water_density(0),
+		water_offset(0),
+		water_normal(0,0,0),
+		m_broadphase(0),
+		m_dispatcher(0),
+		m_gravity(0,-10,0)
+	{
+	}
 };	
 
 
@@ -49,7 +69,10 @@ struct	btSoftBodyWorldInfo
 class	btSoftBody : public btCollisionObject
 {
 public:
-	btAlignedObjectArray<class btCollisionObject*> m_collisionDisabledObjects;
+	btAlignedObjectArray<const class btCollisionObject*> m_collisionDisabledObjects;
+
+	// The solver object that handles this soft body
+	btSoftBodySolver *m_softBodySolver;
 
 	//
 	// Enumerations
@@ -57,11 +80,13 @@ public:
 
 	///eAeroModel 
 	struct eAeroModel { enum _ {
-		V_Point,	///Vertex normals are oriented toward velocity
-		V_TwoSided,	///Vertex normals are fliped to match velocity	
-		V_OneSided,	///Vertex normals are taken as it is	
-		F_TwoSided,	///Face normals are fliped to match velocity
-		F_OneSided,	///Face normals are taken as it is
+		V_Point,			///Vertex normals are oriented toward velocity
+		V_TwoSided,			///Vertex normals are flipped to match velocity	
+		V_TwoSidedLiftDrag, ///Vertex normals are flipped to match velocity and lift and drag forces are applied
+		V_OneSided,			///Vertex normals are taken as it is	
+		F_TwoSided,			///Face normals are flipped to match velocity
+		F_TwoSidedLiftDrag,	///Face normals are flipped to match velocity and lift and drag forces are applied 
+		F_OneSided,			///Face normals are taken as it is		
 		END
 	};};
 
@@ -94,6 +119,7 @@ public:
 		Node,
 		Link,
 		Face,
+		Tetra,
 		END
 	};};
 
@@ -156,7 +182,7 @@ public:
 	/* sCti is Softbody contact info	*/ 
 	struct	sCti
 	{
-		btCollisionObject*	m_colObj;		/* Rigid body			*/ 
+		const btCollisionObject*	m_colObj;		/* Rigid body			*/ 
 		btVector3		m_normal;	/* Outward normal		*/ 
 		btScalar		m_offset;	/* Offset from origin	*/ 
 	};	
@@ -259,6 +285,7 @@ public:
 		Node*					m_node;			// Node pointer
 		btVector3				m_local;		// Anchor position in body space
 		btRigidBody*			m_body;			// Body
+		btScalar				m_influence;
 		btMatrix3x3				m_c0;			// Impulse matrix
 		btVector3				m_c1;			// Relative anchor
 		btScalar				m_c2;			// ima*dt
@@ -287,9 +314,9 @@ public:
 	};
 	/* Cluster		*/ 
 	struct	Cluster
-	{		
-		btAlignedObjectArray<Node*>	m_nodes;		
+	{
 		tScalarArray				m_masses;
+		btAlignedObjectArray<Node*>	m_nodes;		
 		tVector3Array				m_framerefs;
 		btTransform					m_framexform;
 		btScalar					m_idmass;
@@ -347,18 +374,22 @@ public:
 	{
 		Cluster*			m_soft;
 		btRigidBody*		m_rigid;
-		btCollisionObject*	m_collisionObject;
+		const btCollisionObject*	m_collisionObject;
 
 		Body() : m_soft(0),m_rigid(0),m_collisionObject(0)				{}
 		Body(Cluster* p) : m_soft(p),m_rigid(0),m_collisionObject(0)	{}
-		Body(btCollisionObject* colObj) : m_soft(0),m_collisionObject(colObj)
+		Body(const btCollisionObject* colObj) : m_soft(0),m_collisionObject(colObj)
 		{
-			m_rigid = btRigidBody::upcast(m_collisionObject);
+			m_rigid = (btRigidBody*)btRigidBody::upcast(m_collisionObject);
 		}
 
 		void						activate() const
 		{
-			if(m_rigid) m_rigid->activate();
+			if(m_rigid) 
+				m_rigid->activate();
+			if (m_collisionObject)
+				m_collisionObject->activate();
+
 		}
 		const btMatrix3x3&			invWorldInertia() const
 		{
@@ -376,7 +407,7 @@ public:
 		const btTransform&			xform() const
 		{
 			static const btTransform	identity=btTransform::getIdentity();		
-			if(m_collisionObject) return(m_collisionObject->getInterpolationWorldTransform());
+			if(m_collisionObject) return(m_collisionObject->getWorldTransform());
 			if(m_soft)	return(m_soft->m_framexform);
 			return(identity);
 		}
@@ -450,7 +481,7 @@ public:
 	struct	Joint
 	{
 		struct eType { enum _ {
-			Linear,
+			Linear=0,
 			Angular,
 			Contact
 		};};
@@ -589,7 +620,7 @@ public:
 	};
 
 	//
-	// Typedef's
+	// Typedefs
 	//
 
 	typedef void								(*psolver_t)(btSoftBody*,btScalar,btScalar);
@@ -639,14 +670,22 @@ public:
 
 	btTransform			m_initialWorldTransform;
 
+	btVector3			m_windVelocity;
+	
+	btScalar        m_restLengthScale;
+	
 	//
 	// Api
 	//
 
 	/* ctor																	*/ 
-	btSoftBody(	btSoftBodyWorldInfo* worldInfo,int node_count,
-		const btVector3* x,
-		const btScalar* m);
+	btSoftBody(	btSoftBodyWorldInfo* worldInfo,int node_count,		const btVector3* x,		const btScalar* m);
+
+	/* ctor																	*/ 
+	btSoftBody(	btSoftBodyWorldInfo* worldInfo);
+
+	void	initDefaults();
+
 	/* dtor																	*/ 
 	virtual ~btSoftBody();
 	/* Check for existing link												*/ 
@@ -720,7 +759,8 @@ public:
 
 	/* Append anchor														*/ 
 	void				appendAnchor(	int node,
-		btRigidBody* body, bool disableCollisionBetweenLinkedBodies=false);
+		btRigidBody* body, bool disableCollisionBetweenLinkedBodies=false,btScalar influence = 1);
+	void			appendAnchor(int node,btRigidBody* body, const btVector3& localPivot,bool disableCollisionBetweenLinkedBodies=false,btScalar influence = 1);
 	/* Append linear joint													*/ 
 	void				appendLinearJoint(const LJoint::Specs& specs,Cluster* body0,Body body1);
 	void				appendLinearJoint(const LJoint::Specs& specs,Body body=Body());
@@ -734,6 +774,12 @@ public:
 	/* Add force (or gravity) to a node of the body							*/ 
 	void				addForce(		const btVector3& force,
 		int node);
+	/* Add aero force to a node of the body */
+	void			    addAeroForceToNode(const btVector3& windVelocity,int nodeIndex);
+
+	/* Add aero force to a face of the body */
+	void			    addAeroForceToFace(const btVector3& windVelocity,int faceIndex);
+
 	/* Add velocity to the entire body										*/ 
 	void				addVelocity(	const btVector3& velocity);
 
@@ -767,9 +813,15 @@ public:
 	void				rotate(	const btQuaternion& rot);
 	/* Scale																*/ 
 	void				scale(	const btVector3& scl);
+	/* Get link resting lengths scale										*/
+	btScalar			getRestLengthScale();
+	/* Scale resting length of all springs									*/
+	void				setRestLengthScale(btScalar restLength);
 	/* Set current state as pose											*/ 
 	void				setPose(		bool bvolume,
 		bool bframe);
+	/* Set current link lengths as resting lengths							*/ 
+	void				resetLinkRestLengths();
 	/* Return the volume													*/ 
 	btScalar			getVolume() const;
 	/* Cluster count														*/ 
@@ -824,9 +876,52 @@ public:
 	/* integrateMotion														*/ 
 	void				integrateMotion();
 	/* defaultCollisionHandlers												*/ 
-	void				defaultCollisionHandler(btCollisionObject* pco);
+	void				defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap);
 	void				defaultCollisionHandler(btSoftBody* psb);
 
+
+
+	//
+	// Functionality to deal with new accelerated solvers.
+	//
+
+	/**
+	 * Set a wind velocity for interaction with the air.
+	 */
+	void setWindVelocity( const btVector3 &velocity );
+
+
+	/**
+	 * Return the wind velocity for interaction with the air.
+	 */
+	const btVector3& getWindVelocity();
+
+	//
+	// Set the solver that handles this soft body
+	// Should not be allowed to get out of sync with reality
+	// Currently called internally on addition to the world
+	void setSoftBodySolver( btSoftBodySolver *softBodySolver )
+	{
+		m_softBodySolver = softBodySolver;
+	}
+
+	//
+	// Return the solver that handles this soft body
+	// 
+	btSoftBodySolver *getSoftBodySolver()
+	{
+		return m_softBodySolver;
+	}
+
+	//
+	// Return the solver that handles this soft body
+	// 
+	btSoftBodySolver *getSoftBodySolver() const
+	{
+		return m_softBodySolver;
+	}
+
+
 	//
 	// Cast
 	//
@@ -863,11 +958,13 @@ public:
 		btScalar& mint,eFeature::_& feature,int& index,bool bcountonly) const;
 	void				initializeFaceTree();
 	btVector3			evaluateCom() const;
-	bool				checkContact(btCollisionObject* colObj,const btVector3& x,btScalar margin,btSoftBody::sCti& cti) const;
+	bool				checkContact(const btCollisionObjectWrapper* colObjWrap,const btVector3& x,btScalar margin,btSoftBody::sCti& cti) const;
 	void				updateNormals();
 	void				updateBounds();
 	void				updatePose();
 	void				updateConstants();
+	void				updateLinkConstants();
+	void				updateArea(bool averageArea = true);
 	void				initializeClusters();
 	void				updateClusters();
 	void				cleanupClusters();
@@ -884,8 +981,18 @@ public:
 	static psolver_t	getSolver(ePSolver::_ solver);
 	static vsolver_t	getSolver(eVSolver::_ solver);
 
+
+	virtual	int	calculateSerializeBufferSize()	const;
+
+	///fills the dataBuffer and returns the struct name (and 0 on failure)
+	virtual	const char*	serialize(void* dataBuffer,  class btSerializer* serializer) const;
+
+	//virtual void serializeSingleObject(class btSerializer* serializer) const;
+
+
 };
 
 
 
+
 #endif //_BT_SOFT_BODY_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp
index 02e8186be..6e94d0a81 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.cpp
@@ -25,7 +25,7 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btSphereShape.h"
 #include "BulletCollision/CollisionShapes/btTetrahedronShape.h"
 #include "BulletCollision/CollisionShapes/btConvexHullShape.h"
-
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 
 #include "LinearMath/btIDebugDraw.h"
@@ -34,10 +34,10 @@ subject to the following restrictions:
 
 #define BT_SOFTBODY_TRIANGLE_EXTRUSION btScalar(0.06)//make this configurable
 
-btSoftBodyConcaveCollisionAlgorithm::btSoftBodyConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1,bool isSwapped)
+btSoftBodyConcaveCollisionAlgorithm::btSoftBodyConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped)
 : btCollisionAlgorithm(ci),
 m_isSwapped(isSwapped),
-m_btSoftBodyTriangleCallback(ci.m_dispatcher1,body0,body1,isSwapped)
+m_btSoftBodyTriangleCallback(ci.m_dispatcher1,body0Wrap,body1Wrap,isSwapped)
 {
 }
 
@@ -49,12 +49,12 @@ btSoftBodyConcaveCollisionAlgorithm::~btSoftBodyConcaveCollisionAlgorithm()
 
 
 
-btSoftBodyTriangleCallback::btSoftBodyTriangleCallback(btDispatcher*  dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped):
+btSoftBodyTriangleCallback::btSoftBodyTriangleCallback(btDispatcher*  dispatcher,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped):
 m_dispatcher(dispatcher),
 m_dispatchInfoPtr(0)
 {
-	m_softBody = (btSoftBody*) (isSwapped? body1:body0);
-	m_triBody = isSwapped? body0:body1;
+	m_softBody = (isSwapped? (btSoftBody*)body1Wrap->getCollisionObject():(btSoftBody*)body0Wrap->getCollisionObject());
+	m_triBody = isSwapped? body0Wrap->getCollisionObject():body1Wrap->getCollisionObject();
 
 	//
 	// create the manifold from the dispatcher 'manifold pool'
@@ -90,15 +90,15 @@ void btSoftBodyTriangleCallback::processTriangle(btVector3* triangle,int partId,
 {
 	//just for debugging purposes
 	//printf("triangle %d",m_triangleCount++);
-	btCollisionObject* ob = static_cast<btCollisionObject*>(m_triBody);
+	
 	btCollisionAlgorithmConstructionInfo ci;
 	ci.m_dispatcher1 = m_dispatcher;
 
 	///debug drawing of the overlapping triangles
-	if (m_dispatchInfoPtr && m_dispatchInfoPtr->m_debugDraw && m_dispatchInfoPtr->m_debugDraw->getDebugMode() &btIDebugDraw::DBG_DrawWireframe)
+	if (m_dispatchInfoPtr && m_dispatchInfoPtr->m_debugDraw && (m_dispatchInfoPtr->m_debugDraw->getDebugMode() &btIDebugDraw::DBG_DrawWireframe))
 	{
-		btVector3 color(255,255,0);
-		btTransform& tr = ob->getWorldTransform();
+		btVector3 color(1,1,0);
+		const btTransform& tr = m_triBody->getWorldTransform();
 		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[0]),tr(triangle[1]),color);
 		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[1]),tr(triangle[2]),color);
 		m_dispatchInfoPtr->m_debugDraw->drawLine(tr(triangle[2]),tr(triangle[0]),color);
@@ -115,18 +115,18 @@ void btSoftBodyTriangleCallback::processTriangle(btVector3* triangle,int partId,
 		btAssert(tm);
 
 		//copy over user pointers to temporary shape
-		tm->setUserPointer(ob->getRootCollisionShape()->getUserPointer());
+		tm->setUserPointer(m_triBody->getCollisionShape()->getUserPointer());
 
-		btCollisionShape* tmpShape = ob->getCollisionShape();
-		ob->internalSetTemporaryCollisionShape( tm );
+		btCollisionObjectWrapper softBody(0,m_softBody->getCollisionShape(),m_softBody,m_softBody->getWorldTransform());
+		//btCollisionObjectWrapper triBody(0,tm, ob, btTransform::getIdentity());//ob->getWorldTransform());//??
+		btCollisionObjectWrapper triBody(0,tm, m_triBody, m_triBody->getWorldTransform());
 
+		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(&softBody,&triBody,0);//m_manifoldPtr);
 
-		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(m_softBody,m_triBody,0);//m_manifoldPtr);
-
-		colAlgo->processCollision(m_softBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
+		colAlgo->processCollision(&softBody,&triBody,*m_dispatchInfoPtr,m_resultOut);
 		colAlgo->~btCollisionAlgorithm();
 		ci.m_dispatcher1->freeCollisionAlgorithm(colAlgo);
-		ob->internalSetTemporaryCollisionShape( tmpShape);
+		
 		return;
 	}
 
@@ -158,24 +158,18 @@ void btSoftBodyTriangleCallback::processTriangle(btVector3* triangle,int partId,
 		//	tm.setMargin(m_collisionMarginTriangle);
 
 		//copy over user pointers to temporary shape
-		tm->setUserPointer(ob->getRootCollisionShape()->getUserPointer());
+		tm->setUserPointer(m_triBody->getCollisionShape()->getUserPointer());
 
-		btCollisionShape* tmpShape = ob->getCollisionShape();
-		ob->internalSetTemporaryCollisionShape( tm );
+		
+		btCollisionObjectWrapper softBody(0,m_softBody->getCollisionShape(),m_softBody,m_softBody->getWorldTransform());
+		btCollisionObjectWrapper triBody(0,tm, m_triBody, m_triBody->getWorldTransform());//btTransform::getIdentity());//??
 
+		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(&softBody,&triBody,0);//m_manifoldPtr);
 
-		btCollisionAlgorithm* colAlgo = ci.m_dispatcher1->findAlgorithm(m_softBody,m_triBody,0);//m_manifoldPtr);
-		///this should use the btDispatcher, so the actual registered algorithm is used
-		//		btConvexConvexAlgorithm cvxcvxalgo(m_manifoldPtr,ci,m_convexBody,m_triBody);
-
-		//m_resultOut->setShapeIdentifiersB(partId,triangleIndex);
-		//		cvxcvxalgo.processCollision(m_convexBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
-		colAlgo->processCollision(m_softBody,m_triBody,*m_dispatchInfoPtr,m_resultOut);
+		colAlgo->processCollision(&softBody,&triBody,*m_dispatchInfoPtr,m_resultOut);
 		colAlgo->~btCollisionAlgorithm();
 		ci.m_dispatcher1->freeCollisionAlgorithm(colAlgo);
 
-
-		ob->internalSetTemporaryCollisionShape( tmpShape );
 		triIndex.m_childShape = tm;
 		m_shapeCache.insert(triKey,triIndex);
 
@@ -187,7 +181,7 @@ void btSoftBodyTriangleCallback::processTriangle(btVector3* triangle,int partId,
 
 
 
-void	btSoftBodyTriangleCallback::setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void	btSoftBodyTriangleCallback::setTimeStepAndCounters(btScalar collisionMarginTriangle,const btCollisionObjectWrapper* triBodyWrap, const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	m_dispatchInfoPtr = &dispatchInfo;
 	m_collisionMarginTriangle = collisionMarginTriangle+btScalar(BT_SOFTBODY_TRIANGLE_EXTRUSION);
@@ -204,7 +198,7 @@ void	btSoftBodyTriangleCallback::setTimeStepAndCounters(btScalar collisionMargin
 	softTransform.setOrigin(softBodyCenter);
 
 	btTransform convexInTriangleSpace;
-	convexInTriangleSpace = m_triBody->getWorldTransform().inverse() * softTransform;
+	convexInTriangleSpace = triBodyWrap->getWorldTransform().inverse() * softTransform;
 	btTransformAabb(halfExtents,m_collisionMarginTriangle,convexInTriangleSpace,m_aabbMin,m_aabbMax);
 }
 
@@ -214,33 +208,28 @@ void btSoftBodyConcaveCollisionAlgorithm::clearCache()
 
 }
 
-void btSoftBodyConcaveCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btSoftBodyConcaveCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 
 
 	//btCollisionObject* convexBody = m_isSwapped ? body1 : body0;
-	btCollisionObject* triBody = m_isSwapped ? body0 : body1;
+	const btCollisionObjectWrapper* triBody = m_isSwapped ? body0Wrap : body1Wrap;
 
 	if (triBody->getCollisionShape()->isConcave())
 	{
 
 
-		btCollisionObject*	triOb = triBody;
-		btConcaveShape* concaveShape = static_cast<btConcaveShape*>( triOb->getCollisionShape());
+		const btCollisionObject*	triOb = triBody->getCollisionObject();
+		const btConcaveShape* concaveShape = static_cast<const btConcaveShape*>( triOb->getCollisionShape());
 
 		//	if (convexBody->getCollisionShape()->isConvex())
 		{
 			btScalar collisionMarginTriangle = concaveShape->getMargin();
 
 			//			resultOut->setPersistentManifold(m_btSoftBodyTriangleCallback.m_manifoldPtr);
-			m_btSoftBodyTriangleCallback.setTimeStepAndCounters(collisionMarginTriangle,dispatchInfo,resultOut);
-
-			//Disable persistency. previously, some older algorithm calculated all contacts in one go, so you can clear it here.
-			//m_dispatcher->clearManifold(m_btSoftBodyTriangleCallback.m_manifoldPtr);
-
-			//			m_btSoftBodyTriangleCallback.m_manifoldPtr->setBodies(convexBody,triBody);
-
+			m_btSoftBodyTriangleCallback.setTimeStepAndCounters(collisionMarginTriangle,triBody,dispatchInfo,resultOut);
 
+		
 			concaveShape->processAllTriangles( &m_btSoftBodyTriangleCallback,m_btSoftBodyTriangleCallback.getAabbMin(),m_btSoftBodyTriangleCallback.getAabbMax());
 
 			//	resultOut->refreshContactPoints();
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h
index a6ea33717..11c7b88f9 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyConcaveCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
-#define SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
+#ifndef BT_SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
+#define BT_SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
 
 #include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btDispatcher.h"
@@ -45,7 +45,9 @@ struct btTriIndex
 	int	getTriangleIndex() const
 	{
 		// Get only the lower bits where the triangle index is stored
-		return (m_PartIdTriangleIndex&~((~0)<<(31-MAX_NUM_PARTS_IN_BITS)));
+		unsigned int x = 0;
+		unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);
+		return (m_PartIdTriangleIndex&~(y));
 	}
 	int	getPartId() const
 	{
@@ -63,7 +65,7 @@ struct btTriIndex
 class btSoftBodyTriangleCallback : public btTriangleCallback
 {
 	btSoftBody* m_softBody;
-	btCollisionObject* m_triBody;
+	const btCollisionObject* m_triBody;
 
 	btVector3	m_aabbMin;
 	btVector3	m_aabbMax ;
@@ -81,9 +83,9 @@ public:
 
 	//	btPersistentManifold*	m_manifoldPtr;
 
-	btSoftBodyTriangleCallback(btDispatcher* dispatcher,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+	btSoftBodyTriangleCallback(btDispatcher* dispatcher,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped);
 
-	void	setTimeStepAndCounters(btScalar collisionMarginTriangle,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	void	setTimeStepAndCounters(btScalar collisionMarginTriangle,const btCollisionObjectWrapper* triObjWrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual ~btSoftBodyTriangleCallback();
 
@@ -115,11 +117,11 @@ class btSoftBodyConcaveCollisionAlgorithm  : public btCollisionAlgorithm
 
 public:
 
-	btSoftBodyConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1,bool isSwapped);
+	btSoftBodyConcaveCollisionAlgorithm( const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,bool isSwapped);
 
 	virtual ~btSoftBodyConcaveCollisionAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	btScalar	calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -132,22 +134,22 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSoftBodyConcaveCollisionAlgorithm));
-			return new(mem) btSoftBodyConcaveCollisionAlgorithm(ci,body0,body1,false);
+			return new(mem) btSoftBodyConcaveCollisionAlgorithm(ci,body0Wrap,body1Wrap,false);
 		}
 	};
 
 	struct SwappedCreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSoftBodyConcaveCollisionAlgorithm));
-			return new(mem) btSoftBodyConcaveCollisionAlgorithm(ci,body0,body1,true);
+			return new(mem) btSoftBodyConcaveCollisionAlgorithm(ci,body0Wrap,body1Wrap,true);
 		}
 	};
 
 };
 
-#endif //SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
+#endif //BT_SOFT_BODY_CONCAVE_COLLISION_ALGORITHM_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyData.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyData.h
new file mode 100644
index 000000000..87d8841cf
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyData.h
@@ -0,0 +1,217 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFTBODY_FLOAT_DATA
+#define BT_SOFTBODY_FLOAT_DATA
+
+#include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletDynamics/Dynamics/btRigidBody.h"
+
+
+struct	SoftBodyMaterialData
+{
+	float	m_linearStiffness;
+	float	m_angularStiffness;
+	float	m_volumeStiffness;
+	int		m_flags;
+};
+
+struct	SoftBodyNodeData
+{
+	SoftBodyMaterialData		*m_material;
+	btVector3FloatData			m_position;
+	btVector3FloatData			m_previousPosition;
+	btVector3FloatData			m_velocity;
+	btVector3FloatData			m_accumulatedForce;
+	btVector3FloatData			m_normal;
+	float						m_inverseMass;
+	float						m_area;
+	int							m_attach;
+	int							m_pad;
+};
+
+struct	SoftBodyLinkData
+{
+	SoftBodyMaterialData	*m_material;
+	int						m_nodeIndices[2];			// Node pointers
+	float					m_restLength;			// Rest length		
+	int						m_bbending;		// Bending link
+};
+
+struct	SoftBodyFaceData
+{
+	btVector3FloatData		m_normal;		// Normal
+	SoftBodyMaterialData	*m_material;
+	int						m_nodeIndices[3];			// Node pointers
+	float					m_restArea;			// Rest area
+};	
+
+struct	SoftBodyTetraData
+{
+	btVector3FloatData		m_c0[4];		// gradients
+	SoftBodyMaterialData	*m_material;
+	int						m_nodeIndices[4];			// Node pointers		
+	float					m_restVolume;			// Rest volume
+	float					m_c1;			// (4*kVST)/(im0+im1+im2+im3)
+	float					m_c2;			// m_c1/sum(|g0..3|^2)
+	int						m_pad;
+};
+
+struct	SoftRigidAnchorData
+{
+	btMatrix3x3FloatData	m_c0;			// Impulse matrix
+	btVector3FloatData		m_c1;			// Relative anchor
+	btVector3FloatData		m_localFrame;		// Anchor position in body space
+	btRigidBodyData			*m_rigidBody;
+	int						m_nodeIndex;			// Node pointer
+	float					m_c2;			// ima*dt
+};
+
+
+
+struct	SoftBodyConfigData
+{
+	int					m_aeroModel;		// Aerodynamic model (default: V_Point)
+	float				m_baumgarte;			// Velocities correction factor (Baumgarte)
+	float				m_damping;			// Damping coefficient [0,1]
+	float				m_drag;			// Drag coefficient [0,+inf]
+	float				m_lift;			// Lift coefficient [0,+inf]
+	float				m_pressure;			// Pressure coefficient [-inf,+inf]
+	float				m_volume;			// Volume conversation coefficient [0,+inf]
+	float				m_dynamicFriction;			// Dynamic friction coefficient [0,1]
+	float				m_poseMatch;			// Pose matching coefficient [0,1]		
+	float				m_rigidContactHardness;			// Rigid contacts hardness [0,1]
+	float				m_kineticContactHardness;			// Kinetic contacts hardness [0,1]
+	float				m_softContactHardness;			// Soft contacts hardness [0,1]
+	float				m_anchorHardness;			// Anchors hardness [0,1]
+	float				m_softRigidClusterHardness;		// Soft vs rigid hardness [0,1] (cluster only)
+	float				m_softKineticClusterHardness;		// Soft vs kinetic hardness [0,1] (cluster only)
+	float				m_softSoftClusterHardness;		// Soft vs soft hardness [0,1] (cluster only)
+	float				m_softRigidClusterImpulseSplit;	// Soft vs rigid impulse split [0,1] (cluster only)
+	float				m_softKineticClusterImpulseSplit;	// Soft vs rigid impulse split [0,1] (cluster only)
+	float				m_softSoftClusterImpulseSplit;	// Soft vs rigid impulse split [0,1] (cluster only)
+	float				m_maxVolume;		// Maximum volume ratio for pose
+	float				m_timeScale;		// Time scale
+	int					m_velocityIterations;	// Velocities solver iterations
+	int					m_positionIterations;	// Positions solver iterations
+	int					m_driftIterations;	// Drift solver iterations
+	int					m_clusterIterations;	// Cluster solver iterations
+	int					m_collisionFlags;	// Collisions flags
+};
+
+struct	SoftBodyPoseData
+{
+	btMatrix3x3FloatData	m_rot;			// Rotation
+	btMatrix3x3FloatData	m_scale;			// Scale
+	btMatrix3x3FloatData	m_aqq;			// Base scaling
+	btVector3FloatData		m_com;			// COM
+
+	btVector3FloatData		*m_positions;			// Reference positions
+	float					*m_weights;	// Weights
+	int						m_numPositions;
+	int						m_numWeigts;
+
+	int						m_bvolume;		// Is valid
+	int						m_bframe;		// Is frame
+	float					m_restVolume;		// Rest volume
+	int						m_pad;
+};
+
+struct	SoftBodyClusterData
+{
+		btTransformFloatData		m_framexform;
+		btMatrix3x3FloatData		m_locii;
+		btMatrix3x3FloatData		m_invwi;
+		btVector3FloatData			m_com;
+		btVector3FloatData			m_vimpulses[2];
+		btVector3FloatData			m_dimpulses[2];
+		btVector3FloatData			m_lv;
+		btVector3FloatData			m_av;
+		
+		btVector3FloatData			*m_framerefs;
+		int							*m_nodeIndices;
+		float						*m_masses;
+
+		int							m_numFrameRefs;
+		int							m_numNodes;
+		int							m_numMasses;
+
+		float						m_idmass;
+		float						m_imass;
+		int							m_nvimpulses;
+		int							m_ndimpulses;
+		float						m_ndamping;
+		float						m_ldamping;
+		float						m_adamping;
+		float						m_matching;
+		float						m_maxSelfCollisionImpulse;
+		float						m_selfCollisionImpulseFactor;
+		int							m_containsAnchor;
+		int							m_collide;
+		int							m_clusterIndex;
+};
+
+
+enum	btSoftJointBodyType
+{
+	BT_JOINT_SOFT_BODY_CLUSTER=1,
+	BT_JOINT_RIGID_BODY,
+	BT_JOINT_COLLISION_OBJECT
+};
+
+struct	btSoftBodyJointData
+{
+	void						*m_bodyA;
+	void						*m_bodyB;
+	btVector3FloatData			m_refs[2];
+	float						m_cfm;
+	float						m_erp;
+	float						m_split;
+	int							m_delete;
+	btVector3FloatData			m_relPosition[2];//linear
+	int							m_bodyAtype;
+	int							m_bodyBtype;
+	int							m_jointType;
+	int							m_pad;
+};
+
+///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
+struct	btSoftBodyFloatData
+{
+	btCollisionObjectFloatData	m_collisionObjectData;
+
+	SoftBodyPoseData		*m_pose;
+	SoftBodyMaterialData	**m_materials;
+	SoftBodyNodeData		*m_nodes;
+	SoftBodyLinkData		*m_links;
+	SoftBodyFaceData		*m_faces;
+	SoftBodyTetraData		*m_tetrahedra;
+	SoftRigidAnchorData		*m_anchors;
+	SoftBodyClusterData		*m_clusters;
+	btSoftBodyJointData		*m_joints;
+
+	int						m_numMaterials;
+	int						m_numNodes;
+	int						m_numLinks;
+	int						m_numFaces;
+	int						m_numTetrahedra;
+	int						m_numAnchors;
+	int						m_numClusters;
+	int						m_numJoints;
+	SoftBodyConfigData		m_config;
+};
+
+#endif //BT_SOFTBODY_FLOAT_DATA
+
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.cpp b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.cpp
index 448d3c81f..0fb3560e9 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.cpp
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.cpp
@@ -19,6 +19,8 @@ subject to the following restrictions:
 #include <string.h>
 #include "btSoftBodyHelpers.h"
 #include "LinearMath/btConvexHull.h"
+#include "LinearMath/btConvexHullComputer.h"
+
 
 //
 static void				drawVertex(	btIDebugDraw* idraw,
@@ -165,57 +167,181 @@ void			btSoftBodyHelpers::Draw(	btSoftBody* psb,
 	const btVector3		ccolor=btVector3(1,0,0);
 	int i,j,nj;
 
-	/* Nodes	*/ 
-	if(0!=(drawflags&fDrawFlags::Nodes))
+		/* Clusters	*/ 
+	if(0!=(drawflags&fDrawFlags::Clusters))
 	{
-		for(i=0;i<psb->m_nodes.size();++i)
+		srand(1806);
+		for(i=0;i<psb->m_clusters.size();++i)
 		{
-			const btSoftBody::Node&	n=psb->m_nodes[i];
-			if(0==(n.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
-			idraw->drawLine(n.m_x-btVector3(scl,0,0),n.m_x+btVector3(scl,0,0),btVector3(1,0,0));
-			idraw->drawLine(n.m_x-btVector3(0,scl,0),n.m_x+btVector3(0,scl,0),btVector3(0,1,0));
-			idraw->drawLine(n.m_x-btVector3(0,0,scl),n.m_x+btVector3(0,0,scl),btVector3(0,0,1));
+			if(psb->m_clusters[i]->m_collide)
+			{
+				btVector3						color(	rand()/(btScalar)RAND_MAX,
+					rand()/(btScalar)RAND_MAX,
+					rand()/(btScalar)RAND_MAX);
+				color=color.normalized()*0.75;
+				btAlignedObjectArray<btVector3>	vertices;
+				vertices.resize(psb->m_clusters[i]->m_nodes.size());
+				for(j=0,nj=vertices.size();j<nj;++j)
+				{				
+					vertices[j]=psb->m_clusters[i]->m_nodes[j]->m_x;
+				}
+#define USE_NEW_CONVEX_HULL_COMPUTER
+#ifdef USE_NEW_CONVEX_HULL_COMPUTER
+				btConvexHullComputer	computer;
+				int stride = sizeof(btVector3);
+				int count = vertices.size();
+				btScalar shrink=0.f;
+				btScalar shrinkClamp=0.f;
+				computer.compute(&vertices[0].getX(),stride,count,shrink,shrinkClamp);
+				for (int i=0;i<computer.faces.size();i++)
+				{
+
+					int face = computer.faces[i];
+					//printf("face=%d\n",face);
+					const btConvexHullComputer::Edge*  firstEdge = &computer.edges[face];
+					const btConvexHullComputer::Edge*  edge = firstEdge->getNextEdgeOfFace();
+
+					int v0 = firstEdge->getSourceVertex();
+					int v1 = firstEdge->getTargetVertex();
+					while (edge!=firstEdge)
+					{
+						int v2 = edge->getTargetVertex();
+						idraw->drawTriangle(computer.vertices[v0],computer.vertices[v1],computer.vertices[v2],color,1);
+						edge = edge->getNextEdgeOfFace();
+						v0=v1;
+						v1=v2;
+					};
+				}
+#else
+
+				HullDesc		hdsc(QF_TRIANGLES,vertices.size(),&vertices[0]);
+				HullResult		hres;
+				HullLibrary		hlib;
+				hdsc.mMaxVertices=vertices.size();
+				hlib.CreateConvexHull(hdsc,hres);
+				const btVector3	center=average(hres.m_OutputVertices);
+				add(hres.m_OutputVertices,-center);
+				mul(hres.m_OutputVertices,(btScalar)1);
+				add(hres.m_OutputVertices,center);
+				for(j=0;j<(int)hres.mNumFaces;++j)
+				{
+					const int idx[]={hres.m_Indices[j*3+0],hres.m_Indices[j*3+1],hres.m_Indices[j*3+2]};
+					idraw->drawTriangle(hres.m_OutputVertices[idx[0]],
+						hres.m_OutputVertices[idx[1]],
+						hres.m_OutputVertices[idx[2]],
+						color,1);
+				}
+				hlib.ReleaseResult(hres);
+#endif
+
+			}
+			/* Velocities	*/ 
+#if 0
+			for(int j=0;j<psb->m_clusters[i].m_nodes.size();++j)
+			{
+				const btSoftBody::Cluster&	c=psb->m_clusters[i];
+				const btVector3				r=c.m_nodes[j]->m_x-c.m_com;
+				const btVector3				v=c.m_lv+btCross(c.m_av,r);
+				idraw->drawLine(c.m_nodes[j]->m_x,c.m_nodes[j]->m_x+v,btVector3(1,0,0));
+			}
+#endif
+			/* Frame		*/ 
+	//		btSoftBody::Cluster& c=*psb->m_clusters[i];
+	//		idraw->drawLine(c.m_com,c.m_framexform*btVector3(10,0,0),btVector3(1,0,0));
+	//		idraw->drawLine(c.m_com,c.m_framexform*btVector3(0,10,0),btVector3(0,1,0));
+	//		idraw->drawLine(c.m_com,c.m_framexform*btVector3(0,0,10),btVector3(0,0,1));
 		}
 	}
-	/* Links	*/ 
-	if(0!=(drawflags&fDrawFlags::Links))
+	else
 	{
-		for(i=0;i<psb->m_links.size();++i)
+		/* Nodes	*/ 
+		if(0!=(drawflags&fDrawFlags::Nodes))
 		{
-			const btSoftBody::Link&	l=psb->m_links[i];
-			if(0==(l.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
-			idraw->drawLine(l.m_n[0]->m_x,l.m_n[1]->m_x,lcolor);
+			for(i=0;i<psb->m_nodes.size();++i)
+			{
+				const btSoftBody::Node&	n=psb->m_nodes[i];
+				if(0==(n.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+				idraw->drawLine(n.m_x-btVector3(scl,0,0),n.m_x+btVector3(scl,0,0),btVector3(1,0,0));
+				idraw->drawLine(n.m_x-btVector3(0,scl,0),n.m_x+btVector3(0,scl,0),btVector3(0,1,0));
+				idraw->drawLine(n.m_x-btVector3(0,0,scl),n.m_x+btVector3(0,0,scl),btVector3(0,0,1));
+			}
 		}
-	}
-	/* Normals	*/ 
-	if(0!=(drawflags&fDrawFlags::Normals))
-	{
-		for(i=0;i<psb->m_nodes.size();++i)
+		/* Links	*/ 
+		if(0!=(drawflags&fDrawFlags::Links))
 		{
-			const btSoftBody::Node&	n=psb->m_nodes[i];
-			if(0==(n.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
-			const btVector3			d=n.m_n*nscl;
-			idraw->drawLine(n.m_x,n.m_x+d,ncolor);
-			idraw->drawLine(n.m_x,n.m_x-d,ncolor*0.5);
+			for(i=0;i<psb->m_links.size();++i)
+			{
+				const btSoftBody::Link&	l=psb->m_links[i];
+				if(0==(l.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+				idraw->drawLine(l.m_n[0]->m_x,l.m_n[1]->m_x,lcolor);
+			}
 		}
-	}
-	/* Contacts	*/ 
-	if(0!=(drawflags&fDrawFlags::Contacts))
+		/* Normals	*/ 
+		if(0!=(drawflags&fDrawFlags::Normals))
+		{
+			for(i=0;i<psb->m_nodes.size();++i)
+			{
+				const btSoftBody::Node&	n=psb->m_nodes[i];
+				if(0==(n.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+				const btVector3			d=n.m_n*nscl;
+				idraw->drawLine(n.m_x,n.m_x+d,ncolor);
+				idraw->drawLine(n.m_x,n.m_x-d,ncolor*0.5);
+			}
+		}
+		/* Contacts	*/ 
+		if(0!=(drawflags&fDrawFlags::Contacts))
+		{
+			static const btVector3		axis[]={btVector3(1,0,0),
+				btVector3(0,1,0),
+				btVector3(0,0,1)};
+			for(i=0;i<psb->m_rcontacts.size();++i)
+			{		
+				const btSoftBody::RContact&	c=psb->m_rcontacts[i];
+				const btVector3				o=	c.m_node->m_x-c.m_cti.m_normal*
+					(btDot(c.m_node->m_x,c.m_cti.m_normal)+c.m_cti.m_offset);
+				const btVector3				x=btCross(c.m_cti.m_normal,axis[c.m_cti.m_normal.minAxis()]).normalized();
+				const btVector3				y=btCross(x,c.m_cti.m_normal).normalized();
+				idraw->drawLine(o-x*nscl,o+x*nscl,ccolor);
+				idraw->drawLine(o-y*nscl,o+y*nscl,ccolor);
+				idraw->drawLine(o,o+c.m_cti.m_normal*nscl*3,btVector3(1,1,0));
+			}
+		}
+		/* Faces	*/ 
+	if(0!=(drawflags&fDrawFlags::Faces))
 	{
-		static const btVector3		axis[]={btVector3(1,0,0),
-			btVector3(0,1,0),
-			btVector3(0,0,1)};
-		for(i=0;i<psb->m_rcontacts.size();++i)
-		{		
-			const btSoftBody::RContact&	c=psb->m_rcontacts[i];
-			const btVector3				o=	c.m_node->m_x-c.m_cti.m_normal*
-				(btDot(c.m_node->m_x,c.m_cti.m_normal)+c.m_cti.m_offset);
-			const btVector3				x=btCross(c.m_cti.m_normal,axis[c.m_cti.m_normal.minAxis()]).normalized();
-			const btVector3				y=btCross(x,c.m_cti.m_normal).normalized();
-			idraw->drawLine(o-x*nscl,o+x*nscl,ccolor);
-			idraw->drawLine(o-y*nscl,o+y*nscl,ccolor);
-			idraw->drawLine(o,o+c.m_cti.m_normal*nscl*3,btVector3(1,1,0));
-		}
+		const btScalar	scl=(btScalar)0.8;
+		const btScalar	alp=(btScalar)1;
+		const btVector3	col(0,(btScalar)0.7,0);
+		for(i=0;i<psb->m_faces.size();++i)
+		{
+			const btSoftBody::Face&	f=psb->m_faces[i];
+			if(0==(f.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+			const btVector3			x[]={f.m_n[0]->m_x,f.m_n[1]->m_x,f.m_n[2]->m_x};
+			const btVector3			c=(x[0]+x[1]+x[2])/3;
+			idraw->drawTriangle((x[0]-c)*scl+c,
+				(x[1]-c)*scl+c,
+				(x[2]-c)*scl+c,
+				col,alp);
+		}	
+	}
+	/* Tetras	*/ 
+	if(0!=(drawflags&fDrawFlags::Tetras))
+	{
+		const btScalar	scl=(btScalar)0.8;
+		const btScalar	alp=(btScalar)1;
+		const btVector3	col((btScalar)0.3,(btScalar)0.3,(btScalar)0.7);
+		for(int i=0;i<psb->m_tetras.size();++i)
+		{
+			const btSoftBody::Tetra&	t=psb->m_tetras[i];
+			if(0==(t.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
+			const btVector3				x[]={t.m_n[0]->m_x,t.m_n[1]->m_x,t.m_n[2]->m_x,t.m_n[3]->m_x};
+			const btVector3				c=(x[0]+x[1]+x[2]+x[3])/4;
+			idraw->drawTriangle((x[0]-c)*scl+c,(x[1]-c)*scl+c,(x[2]-c)*scl+c,col,alp);
+			idraw->drawTriangle((x[0]-c)*scl+c,(x[1]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
+			idraw->drawTriangle((x[1]-c)*scl+c,(x[2]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
+			idraw->drawTriangle((x[2]-c)*scl+c,(x[0]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
+		}	
+	}
 	}
 	/* Anchors	*/ 
 	if(0!=(drawflags&fDrawFlags::Anchors))
@@ -238,97 +364,7 @@ void			btSoftBodyHelpers::Draw(	btSoftBody* psb,
 			}
 		}
 	}
-	/* Faces	*/ 
-	if(0!=(drawflags&fDrawFlags::Faces))
-	{
-		const btScalar	scl=(btScalar)0.8;
-		const btScalar	alp=(btScalar)1;
-		const btVector3	col(0,(btScalar)0.7,0);
-		for(i=0;i<psb->m_faces.size();++i)
-		{
-			const btSoftBody::Face&	f=psb->m_faces[i];
-			if(0==(f.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
-			const btVector3			x[]={f.m_n[0]->m_x,f.m_n[1]->m_x,f.m_n[2]->m_x};
-			const btVector3			c=(x[0]+x[1]+x[2])/3;
-			idraw->drawTriangle((x[0]-c)*scl+c,
-				(x[1]-c)*scl+c,
-				(x[2]-c)*scl+c,
-				col,alp);
-		}	
-	}
-	/* Clusters	*/ 
-	if(0!=(drawflags&fDrawFlags::Clusters))
-	{
-		srand(1806);
-		for(i=0;i<psb->m_clusters.size();++i)
-		{
-			if(psb->m_clusters[i]->m_collide)
-			{
-				btVector3						color(	rand()/(btScalar)RAND_MAX,
-					rand()/(btScalar)RAND_MAX,
-					rand()/(btScalar)RAND_MAX);
-				color=color.normalized()*0.75;
-				btAlignedObjectArray<btVector3>	vertices;
-				vertices.resize(psb->m_clusters[i]->m_nodes.size());
-				for(j=0,nj=vertices.size();j<nj;++j)
-				{				
-					vertices[j]=psb->m_clusters[i]->m_nodes[j]->m_x;
-				}
-				HullDesc		hdsc(QF_TRIANGLES,vertices.size(),&vertices[0]);
-				HullResult		hres;
-				HullLibrary		hlib;
-				hdsc.mMaxVertices=vertices.size();
-				hlib.CreateConvexHull(hdsc,hres);
-				const btVector3	center=average(hres.m_OutputVertices);
-				add(hres.m_OutputVertices,-center);
-				mul(hres.m_OutputVertices,(btScalar)1);
-				add(hres.m_OutputVertices,center);
-				for(j=0;j<(int)hres.mNumFaces;++j)
-				{
-					const int idx[]={hres.m_Indices[j*3+0],hres.m_Indices[j*3+1],hres.m_Indices[j*3+2]};
-					idraw->drawTriangle(hres.m_OutputVertices[idx[0]],
-						hres.m_OutputVertices[idx[1]],
-						hres.m_OutputVertices[idx[2]],
-						color,1);
-				}
-				hlib.ReleaseResult(hres);
-			}
-			/* Velocities	*/ 
-#if 0
-			for(int j=0;j<psb->m_clusters[i].m_nodes.size();++j)
-			{
-				const btSoftBody::Cluster&	c=psb->m_clusters[i];
-				const btVector3				r=c.m_nodes[j]->m_x-c.m_com;
-				const btVector3				v=c.m_lv+btCross(c.m_av,r);
-				idraw->drawLine(c.m_nodes[j]->m_x,c.m_nodes[j]->m_x+v,btVector3(1,0,0));
-			}
-#endif
-			/* Frame		*/ 
-			btSoftBody::Cluster& c=*psb->m_clusters[i];
-			idraw->drawLine(c.m_com,c.m_framexform*btVector3(10,0,0),btVector3(1,0,0));
-			idraw->drawLine(c.m_com,c.m_framexform*btVector3(0,10,0),btVector3(0,1,0));
-			idraw->drawLine(c.m_com,c.m_framexform*btVector3(0,0,10),btVector3(0,0,1));
-		}
-	}
-
-	/* Tetras	*/ 
-	if(0!=(drawflags&fDrawFlags::Tetras))
-	{
-		const btScalar	scl=(btScalar)0.8;
-		const btScalar	alp=(btScalar)1;
-		const btVector3	col((btScalar)0.7,(btScalar)0.7,(btScalar)0.7);
-		for(int i=0;i<psb->m_tetras.size();++i)
-		{
-			const btSoftBody::Tetra&	t=psb->m_tetras[i];
-			if(0==(t.m_material->m_flags&btSoftBody::fMaterial::DebugDraw)) continue;
-			const btVector3				x[]={t.m_n[0]->m_x,t.m_n[1]->m_x,t.m_n[2]->m_x,t.m_n[3]->m_x};
-			const btVector3				c=(x[0]+x[1]+x[2]+x[3])/4;
-			idraw->drawTriangle((x[0]-c)*scl+c,(x[1]-c)*scl+c,(x[2]-c)*scl+c,col,alp);
-			idraw->drawTriangle((x[0]-c)*scl+c,(x[1]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
-			idraw->drawTriangle((x[1]-c)*scl+c,(x[2]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
-			idraw->drawTriangle((x[2]-c)*scl+c,(x[0]-c)*scl+c,(x[3]-c)*scl+c,col,alp);
-		}	
-	}
+	
 
 	/* Notes	*/ 
 	if(0!=(drawflags&fDrawFlags::Notes))
@@ -380,7 +416,12 @@ void			btSoftBodyHelpers::Draw(	btSoftBody* psb,
 					idraw->drawLine(o0,o0+a1*10,btVector3(1,1,0));
 					idraw->drawLine(o1,o1+a0*10,btVector3(0,1,1));
 					idraw->drawLine(o1,o1+a1*10,btVector3(0,1,1));
+					break;
 				}
+				default:
+				{
+				}
+					
 			}		
 		}
 	}
@@ -813,7 +854,7 @@ btSoftBody*		btSoftBodyHelpers::CreateEllipsoid(btSoftBodyWorldInfo& worldInfo,c
 //
 btSoftBody*		btSoftBodyHelpers::CreateFromTriMesh(btSoftBodyWorldInfo& worldInfo,const btScalar*	vertices,
 													 const int* triangles,
-													 int ntriangles)
+													 int ntriangles, bool randomizeConstraints)
 {
 	int		maxidx=0;
 	int i,j,ni;
@@ -848,13 +889,18 @@ btSoftBody*		btSoftBodyHelpers::CreateFromTriMesh(btSoftBodyWorldInfo& worldInfo
 #undef IDX
 		psb->appendFace(idx[0],idx[1],idx[2]);
 	}
-	psb->randomizeConstraints();
+
+	if (randomizeConstraints)
+	{
+		psb->randomizeConstraints();
+	}
+
 	return(psb);
 }
 
 //
 btSoftBody*		btSoftBodyHelpers::CreateFromConvexHull(btSoftBodyWorldInfo& worldInfo,	const btVector3* vertices,
-														int nvertices)
+														int nvertices, bool randomizeConstraints)
 {
 	HullDesc		hdsc(QF_TRIANGLES,nvertices,vertices);
 	HullResult		hres;
@@ -874,7 +920,10 @@ btSoftBody*		btSoftBodyHelpers::CreateFromConvexHull(btSoftBodyWorldInfo& worldI
 		psb->appendFace(idx[0],idx[1],idx[2]);
 	}
 	hlib.ReleaseResult(hres);
-	psb->randomizeConstraints();
+	if (randomizeConstraints)
+	{
+		psb->randomizeConstraints();
+	}
 	return(psb);
 }
 
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.h
index 54f81f354..620a52fe3 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyHelpers.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SOFT_BODY_HELPERS_H
-#define SOFT_BODY_HELPERS_H
+#ifndef BT_SOFT_BODY_HELPERS_H
+#define BT_SOFT_BODY_HELPERS_H
 
 #include "btSoftBody.h"
 
@@ -109,24 +109,26 @@ struct	btSoftBodyHelpers
 	static	btSoftBody*		CreateFromTriMesh(	btSoftBodyWorldInfo& worldInfo,
 		const btScalar*	vertices,
 		const int* triangles,
-		int ntriangles);
+		int ntriangles,
+		bool randomizeConstraints = true);
 	/* Create from convex-hull												*/ 
 	static	btSoftBody*		CreateFromConvexHull(	btSoftBodyWorldInfo& worldInfo,
 		const btVector3* vertices,
-		int nvertices);
+		int nvertices,
+		bool randomizeConstraints = true);
 
 
 	/* Export TetGen compatible .smesh file									*/ 
-	static void				ExportAsSMeshFile(	btSoftBody* psb,
-												const char* filename);	
+//	static void				ExportAsSMeshFile(	btSoftBody* psb,
+//												const char* filename);	
 	/* Create from TetGen .ele, .face, .node files							*/ 
-	static btSoftBody*		CreateFromTetGenFile(	btSoftBodyWorldInfo& worldInfo,
-													const char* ele,
-													const char* face,
-													const char* node,
-													bool bfacelinks,
-													bool btetralinks,
-													bool bfacesfromtetras);
+//	static btSoftBody*		CreateFromTetGenFile(	btSoftBodyWorldInfo& worldInfo,
+//													const char* ele,
+//													const char* face,
+//													const char* node,
+//													bool bfacelinks,
+//													bool btetralinks,
+//													bool bfacesfromtetras);
 	/* Create from TetGen .ele, .face, .node data							*/ 
 	static btSoftBody*		CreateFromTetGenData(	btSoftBodyWorldInfo& worldInfo,
 													const char* ele,
@@ -138,4 +140,4 @@ struct	btSoftBodyHelpers
 	
 };
 
-#endif //SOFT_BODY_HELPERS_H
+#endif //BT_SOFT_BODY_HELPERS_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyInternals.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyInternals.h
index 2cb7744cb..19d0543ef 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyInternals.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodyInternals.h
@@ -21,11 +21,12 @@ subject to the following restrictions:
 
 
 #include "LinearMath/btQuickprof.h"
+#include "LinearMath/btPolarDecomposition.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
 #include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
 #include "BulletCollision/CollisionShapes/btConvexInternalShape.h"
 #include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
-
+#include <string.h> //for memset
 //
 // btSymMatrix
 //
@@ -70,7 +71,7 @@ public:
 	///getAabb returns the axis aligned bounding box in the coordinate frame of the given transform t.
 	virtual void getAabb(const btTransform& t,btVector3& aabbMin,btVector3& aabbMax) const
 	{
-		/* t should be identity, but better be safe than...fast? */ 
+		/* t is usually identity, except when colliding against btCompoundShape. See Issue 512 */
 		const btVector3	mins=m_body->m_bounds[0];
 		const btVector3	maxs=m_body->m_bounds[1];
 		const btVector3	crns[]={t*btVector3(mins.x(),mins.y(),mins.z()),
@@ -172,8 +173,7 @@ public:
 template <typename T>
 static inline void			ZeroInitialize(T& value)
 {
-	static const T	zerodummy;
-	value=zerodummy;
+	memset(&value,0,sizeof(T));
 }
 //
 template <typename T>
@@ -615,32 +615,8 @@ private:
 //
 static inline int			PolarDecompose(	const btMatrix3x3& m,btMatrix3x3& q,btMatrix3x3& s)
 {
-	static const btScalar	half=(btScalar)0.5;
-	static const btScalar	accuracy=(btScalar)0.0001;
-	static const int		maxiterations=16;
-	int						i=0;
-	btScalar				det=0;
-	q	=	Mul(m,1/btVector3(m[0][0],m[1][1],m[2][2]).length());
-	det	=	q.determinant();
-	if(!btFuzzyZero(det))
-	{
-		for(;i<maxiterations;++i)
-		{
-			q=Mul(Add(q,Mul(q.adjoint(),1/det).transpose()),half);
-			const btScalar	ndet=q.determinant();
-			if(Sq(ndet-det)>accuracy) det=ndet; else break;
-		}
-		/* Final orthogonalization	*/ 
-		Orthogonalize(q);
-		/* Compute 'S'				*/ 
-		s=q.transpose()*m;
-	}
-	else
-	{
-		q.setIdentity();
-		s.setIdentity();
-	}
-	return(i);
+	static const btPolarDecomposition polar;  
+	return polar.decompose(m, q, s);
 }
 
 //
@@ -667,7 +643,7 @@ struct btSoftColliders
 			threshold	=(btScalar)0;
 		}
 		bool				SolveContact(	const btGjkEpaSolver2::sResults& res,
-			btSoftBody::Body ba,btSoftBody::Body bb,
+			btSoftBody::Body ba,const btSoftBody::Body bb,
 			btSoftBody::CJoint& joint)
 		{
 			if(res.distance<m_margin)
@@ -703,7 +679,7 @@ struct btSoftColliders
 				joint.m_normal		=	norm;
 //				printf("normal=%f,%f,%f\n",res.normal.getX(),res.normal.getY(),res.normal.getZ());
 				joint.m_delete		=	false;
-				joint.m_friction	=	fv.length2()<(-rvac*friction)?1:friction;
+				joint.m_friction	=	fv.length2()<(rvac*friction*rvac*friction)?1:friction;
 				joint.m_massmatrix	=	ImpulseMatrix(	ba.invMass(),ba.invWorldInertia(),joint.m_rpos[0],
 					bb.invMass(),bb.invWorldInertia(),joint.m_rpos[1]);
 
@@ -718,30 +694,30 @@ struct btSoftColliders
 	struct	CollideCL_RS : ClusterBase
 	{
 		btSoftBody*		psb;
-		
-		btCollisionObject*	m_colObj;
+		const btCollisionObjectWrapper*	m_colObjWrap;
+
 		void		Process(const btDbvtNode* leaf)
 		{
 			btSoftBody::Cluster*		cluster=(btSoftBody::Cluster*)leaf->data;
 			btSoftClusterCollisionShape	cshape(cluster);
 			
-			const btConvexShape*		rshape=(const btConvexShape*)m_colObj->getCollisionShape();
+			const btConvexShape*		rshape=(const btConvexShape*)m_colObjWrap->getCollisionShape();
 
 			///don't collide an anchored cluster with a static/kinematic object
-			if(m_colObj->isStaticOrKinematicObject() && cluster->m_containsAnchor)
+			if(m_colObjWrap->getCollisionObject()->isStaticOrKinematicObject() && cluster->m_containsAnchor)
 				return;
 
 			btGjkEpaSolver2::sResults	res;		
 			if(btGjkEpaSolver2::SignedDistance(	&cshape,btTransform::getIdentity(),
-				rshape,m_colObj->getInterpolationWorldTransform(),
+				rshape,m_colObjWrap->getWorldTransform(),
 				btVector3(1,0,0),res))
 			{
 				btSoftBody::CJoint	joint;
-				if(SolveContact(res,cluster,m_colObj,joint))//prb,joint))
+				if(SolveContact(res,cluster,m_colObjWrap->getCollisionObject(),joint))//prb,joint))
 				{
 					btSoftBody::CJoint*	pj=new(btAlignedAlloc(sizeof(btSoftBody::CJoint),16)) btSoftBody::CJoint();
 					*pj=joint;psb->m_joints.push_back(pj);
-					if(m_colObj->isStaticOrKinematicObject())
+					if(m_colObjWrap->getCollisionObject()->isStaticOrKinematicObject())
 					{
 						pj->m_erp	*=	psb->m_cfg.kSKHR_CL;
 						pj->m_split	*=	psb->m_cfg.kSK_SPLT_CL;
@@ -754,19 +730,19 @@ struct btSoftColliders
 				}
 			}
 		}
-		void		Process(btSoftBody* ps,btCollisionObject* colOb)
+		void		ProcessColObj(btSoftBody* ps,const btCollisionObjectWrapper* colObWrap)
 		{
 			psb			=	ps;
-			m_colObj			=	colOb;
+			m_colObjWrap			=	colObWrap;
 			idt			=	ps->m_sst.isdt;
-			m_margin		=	m_colObj->getCollisionShape()->getMargin()+psb->getCollisionShape()->getMargin();
+			m_margin		=	m_colObjWrap->getCollisionShape()->getMargin()+psb->getCollisionShape()->getMargin();
 			///Bullet rigid body uses multiply instead of minimum to determine combined friction. Some customization would be useful.
-			friction	=	btMin(psb->m_cfg.kDF,m_colObj->getFriction());
+			friction	=	btMin(psb->m_cfg.kDF,m_colObjWrap->getCollisionObject()->getFriction());
 			btVector3			mins;
 			btVector3			maxs;
 
 			ATTRIBUTE_ALIGNED16(btDbvtVolume)		volume;
-			colOb->getCollisionShape()->getAabb(colOb->getInterpolationWorldTransform(),mins,maxs);
+			colObWrap->getCollisionShape()->getAabb(colObWrap->getWorldTransform(),mins,maxs);
 			volume=btDbvtVolume::FromMM(mins,maxs);
 			volume.Expand(btVector3(1,1,1)*m_margin);
 			ps->m_cdbvt.collideTV(ps->m_cdbvt.m_root,volume,*this);
@@ -816,7 +792,7 @@ struct btSoftColliders
 				
 			}
 		}
-		void		Process(btSoftBody* psa,btSoftBody* psb)
+		void		ProcessSoftSoft(btSoftBody* psa,btSoftBody* psb)
 		{
 			idt			=	psa->m_sst.isdt;
 			//m_margin		=	(psa->getCollisionShape()->getMargin()+psb->getCollisionShape()->getMargin())/2;
@@ -841,15 +817,16 @@ struct btSoftColliders
 		{
 			const btScalar			m=n.m_im>0?dynmargin:stamargin;
 			btSoftBody::RContact	c;
+
 			if(	(!n.m_battach)&&
-				psb->checkContact(m_colObj1,n.m_x,m,c.m_cti))
+				psb->checkContact(m_colObj1Wrap,n.m_x,m,c.m_cti))
 			{
 				const btScalar	ima=n.m_im;
 				const btScalar	imb= m_rigidBody? m_rigidBody->getInvMass() : 0.f;
 				const btScalar	ms=ima+imb;
 				if(ms>0)
 				{
-					const btTransform&	wtr=m_rigidBody?m_rigidBody->getInterpolationWorldTransform() : m_colObj1->getWorldTransform();
+					const btTransform&	wtr=m_rigidBody?m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform();
 					static const btMatrix3x3	iwiStatic(0,0,0,0,0,0,0,0,0);
 					const btMatrix3x3&	iwi=m_rigidBody?m_rigidBody->getInvInertiaTensorWorld() : iwiStatic;
 					const btVector3		ra=n.m_x-wtr.getOrigin();
@@ -858,13 +835,13 @@ struct btSoftColliders
 					const btVector3		vr=vb-va;
 					const btScalar		dn=btDot(vr,c.m_cti.m_normal);
 					const btVector3		fv=vr-c.m_cti.m_normal*dn;
-					const btScalar		fc=psb->m_cfg.kDF*m_colObj1->getFriction();
+					const btScalar		fc=psb->m_cfg.kDF*m_colObj1Wrap->getCollisionObject()->getFriction();
 					c.m_node	=	&n;
 					c.m_c0		=	ImpulseMatrix(psb->m_sst.sdt,ima,imb,iwi,ra);
 					c.m_c1		=	ra;
 					c.m_c2		=	ima*psb->m_sst.sdt;
-					c.m_c3		=	fv.length2()<(btFabs(dn)*fc)?0:1-fc;
-					c.m_c4		=	m_colObj1->isStaticOrKinematicObject()?psb->m_cfg.kKHR:psb->m_cfg.kCHR;
+			        c.m_c3		=	fv.length2()<(dn*fc*dn*fc)?0:1-fc;
+					c.m_c4		=	m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject()?psb->m_cfg.kKHR:psb->m_cfg.kCHR;
 					psb->m_rcontacts.push_back(c);
 					if (m_rigidBody)
 						m_rigidBody->activate();
@@ -872,7 +849,7 @@ struct btSoftColliders
 			}
 		}
 		btSoftBody*		psb;
-		btCollisionObject*	m_colObj1;
+		const btCollisionObjectWrapper*	m_colObj1Wrap;
 		btRigidBody*	m_rigidBody;
 		btScalar		dynmargin;
 		btScalar		stamargin;
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodySolverVertexBuffer.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodySolverVertexBuffer.h
new file mode 100644
index 000000000..c4733d640
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodySolverVertexBuffer.h
@@ -0,0 +1,165 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_H
+#define BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_H
+
+
+class btVertexBufferDescriptor
+{
+public:
+	enum BufferTypes
+	{
+		CPU_BUFFER,
+		DX11_BUFFER,
+		OPENGL_BUFFER
+	};
+
+protected:	
+
+	bool m_hasVertexPositions;
+	bool m_hasNormals;
+
+	int m_vertexOffset;
+	int m_vertexStride;
+
+	int m_normalOffset;
+	int m_normalStride;
+
+public:
+	btVertexBufferDescriptor()
+	{
+		m_hasVertexPositions = false;
+		m_hasNormals = false;
+		m_vertexOffset = 0;
+		m_vertexStride = 0;
+		m_normalOffset = 0;
+		m_normalStride = 0;
+	}
+
+	virtual ~btVertexBufferDescriptor()
+	{
+
+	}
+
+	virtual bool hasVertexPositions() const
+	{
+		return m_hasVertexPositions;
+	}
+
+	virtual bool hasNormals() const
+	{
+		return m_hasNormals;
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const = 0;
+
+	/**
+	 * Return the vertex offset in floats from the base pointer.
+	 */
+	virtual int getVertexOffset() const
+	{
+		return m_vertexOffset;
+	}
+
+	/**
+	 * Return the vertex stride in number of floats between vertices.
+	 */
+	virtual int getVertexStride() const
+	{
+		return m_vertexStride;
+	}
+
+	/**
+	 * Return the vertex offset in floats from the base pointer.
+	 */
+	virtual int getNormalOffset() const
+	{
+		return m_normalOffset;
+	}
+
+	/**
+	 * Return the vertex stride in number of floats between vertices.
+	 */
+	virtual int getNormalStride() const
+	{
+		return m_normalStride;
+	}
+};
+
+
+class btCPUVertexBufferDescriptor : public btVertexBufferDescriptor
+{
+protected:
+	float *m_basePointer;
+
+public:
+	/**
+	 * vertexBasePointer is pointer to beginning of the buffer.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btCPUVertexBufferDescriptor( float *basePointer, int vertexOffset, int vertexStride )
+	{
+		m_basePointer = basePointer;
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+	}
+
+	/**
+	 * vertexBasePointer is pointer to beginning of the buffer.
+	 * vertexOffset is the offset in floats to the first vertex.
+	 * vertexStride is the stride in floats between vertices.
+	 */
+	btCPUVertexBufferDescriptor( float *basePointer, int vertexOffset, int vertexStride, int normalOffset, int normalStride )
+	{
+		m_basePointer = basePointer;
+
+		m_vertexOffset = vertexOffset;
+		m_vertexStride = vertexStride;
+		m_hasVertexPositions = true;
+
+		m_normalOffset = normalOffset;
+		m_normalStride = normalStride;
+		m_hasNormals = true;
+	}
+
+	virtual ~btCPUVertexBufferDescriptor()
+	{
+
+	}
+
+	/**
+	 * Return the type of the vertex buffer descriptor.
+	 */
+	virtual BufferTypes getBufferType() const
+	{
+		return CPU_BUFFER;
+	}
+
+	/**
+	 * Return the base pointer in memory to the first vertex.
+	 */
+	virtual float *getBasePointer() const
+	{
+		return m_basePointer;
+	}
+};
+
+#endif // #ifndef BT_SOFT_BODY_SOLVER_VERTEX_BUFFER_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftBodySolvers.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodySolvers.h
new file mode 100644
index 000000000..6947bc27d
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftBodySolvers.h
@@ -0,0 +1,154 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SOFT_BODY_SOLVERS_H
+#define BT_SOFT_BODY_SOLVERS_H
+
+#include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
+
+
+class btSoftBodyTriangleData;
+class btSoftBodyLinkData;
+class btSoftBodyVertexData;
+class btVertexBufferDescriptor;
+class btCollisionObject;
+class btSoftBody;
+
+
+class btSoftBodySolver
+{
+public:
+	enum SolverTypes
+	{
+		DEFAULT_SOLVER,
+		CPU_SOLVER,
+		CL_SOLVER,
+		CL_SIMD_SOLVER,
+		DX_SOLVER,
+		DX_SIMD_SOLVER
+	};
+
+
+protected:
+	int m_numberOfPositionIterations;
+	int m_numberOfVelocityIterations;
+	// Simulation timescale
+	float m_timeScale;
+	
+public:
+	btSoftBodySolver() :
+		m_numberOfPositionIterations( 10 ),
+		m_timeScale( 1 )
+	{
+		m_numberOfVelocityIterations = 0;
+		m_numberOfPositionIterations = 5;
+	}
+
+	virtual ~btSoftBodySolver()
+	{
+	}
+	
+	/**
+	 * Return the type of the solver.
+	 */
+	virtual SolverTypes getSolverType() const = 0;
+
+
+	/** Ensure that this solver is initialized. */
+	virtual bool checkInitialized() = 0;
+
+	/** Optimize soft bodies in this solver. */
+	virtual void optimize( btAlignedObjectArray< btSoftBody * > &softBodies , bool forceUpdate=false) = 0;
+
+	/** Copy necessary data back to the original soft body source objects. */
+	virtual void copyBackToSoftBodies(bool bMove = true) = 0;
+
+	/** Predict motion of soft bodies into next timestep */
+	virtual void predictMotion( float solverdt ) = 0;
+
+	/** Solve constraints for a set of soft bodies */
+	virtual void solveConstraints( float solverdt ) = 0;
+
+	/** Perform necessary per-step updates of soft bodies such as recomputing normals and bounding boxes */
+	virtual void updateSoftBodies() = 0;
+
+	/** Process a collision between one of the world's soft bodies and another collision object */
+	virtual void processCollision( btSoftBody *, const struct btCollisionObjectWrapper* ) = 0;
+
+	/** Process a collision between two soft bodies */
+	virtual void processCollision( btSoftBody*, btSoftBody* ) = 0;
+
+	/** Set the number of velocity constraint solver iterations this solver uses. */
+	virtual void setNumberOfPositionIterations( int iterations )
+	{
+		m_numberOfPositionIterations = iterations;
+	}
+
+	/** Get the number of velocity constraint solver iterations this solver uses. */
+	virtual int getNumberOfPositionIterations()
+	{
+		return m_numberOfPositionIterations;
+	}
+
+	/** Set the number of velocity constraint solver iterations this solver uses. */
+	virtual void setNumberOfVelocityIterations( int iterations )
+	{
+		m_numberOfVelocityIterations = iterations;
+	}
+
+	/** Get the number of velocity constraint solver iterations this solver uses. */
+	virtual int getNumberOfVelocityIterations()
+	{
+		return m_numberOfVelocityIterations;
+	}
+
+	/** Return the timescale that the simulation is using */
+	float getTimeScale()
+	{
+		return m_timeScale;
+	}
+
+#if 0
+	/**
+	 * Add a collision object to be used by the indicated softbody.
+	 */
+	virtual void addCollisionObjectForSoftBody( int clothIdentifier, btCollisionObject *collisionObject ) = 0;
+#endif
+};
+
+/** 
+ * Class to manage movement of data from a solver to a given target.
+ * This version is abstract. Subclasses will have custom pairings for different combinations.
+ */
+class btSoftBodySolverOutput
+{
+protected:
+
+public:
+	btSoftBodySolverOutput()
+	{
+	}
+
+	virtual ~btSoftBodySolverOutput()
+	{
+	}
+
+
+	/** Output current computed vertex data to the vertex buffers for all cloths in the solver. */
+	virtual void copySoftBodyToVertexBuffer( const btSoftBody * const softBody, btVertexBufferDescriptor *vertexBuffer ) = 0;
+};
+
+
+#endif // #ifndef BT_SOFT_BODY_SOLVERS_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
index 11ad9e7da..01c148a2c 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
@@ -19,12 +19,15 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btBoxShape.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "btSoftBody.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
+
 ///TODO: include all the shapes that the softbody can collide with
 ///alternatively, implement special case collision algorithms (just like for rigid collision shapes)
 
 //#include <stdio.h>
 
-btSoftRigidCollisionAlgorithm::btSoftRigidCollisionAlgorithm(btPersistentManifold* /*mf*/,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* /*col0*/,btCollisionObject* /*col1*/, bool isSwapped)
+btSoftRigidCollisionAlgorithm::btSoftRigidCollisionAlgorithm(btPersistentManifold* /*mf*/,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* ,const btCollisionObjectWrapper* , bool isSwapped)
 : btCollisionAlgorithm(ci),
 //m_ownManifold(false),
 //m_manifoldPtr(mf),
@@ -50,18 +53,19 @@ btSoftRigidCollisionAlgorithm::~btSoftRigidCollisionAlgorithm()
 
 #include <stdio.h>
 
-void btSoftRigidCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
+void btSoftRigidCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut)
 {
 	(void)dispatchInfo;
 	(void)resultOut;
 	//printf("btSoftRigidCollisionAlgorithm\n");
-
-	btSoftBody* softBody =  m_isSwapped? (btSoftBody*)body1 : (btSoftBody*)body0;
-	btCollisionObject* rigidCollisionObject = m_isSwapped? body0 : body1;
+//	const btCollisionObjectWrapper* softWrap = m_isSwapped?body1Wrap:body0Wrap;
+//	const btCollisionObjectWrapper* rigidWrap = m_isSwapped?body0Wrap:body1Wrap;
+	btSoftBody* softBody =  m_isSwapped? (btSoftBody*)body1Wrap->getCollisionObject() : (btSoftBody*)body0Wrap->getCollisionObject();
+	const btCollisionObjectWrapper* rigidCollisionObjectWrap = m_isSwapped? body0Wrap : body1Wrap;
 	
-	if (softBody->m_collisionDisabledObjects.findLinearSearch(rigidCollisionObject)==softBody->m_collisionDisabledObjects.size())
+	if (softBody->m_collisionDisabledObjects.findLinearSearch(rigidCollisionObjectWrap->getCollisionObject())==softBody->m_collisionDisabledObjects.size())
 	{
-		softBody->defaultCollisionHandler(rigidCollisionObject);
+		softBody->getSoftBodySolver()->processCollision(softBody, rigidCollisionObjectWrap);
 	}
 
 
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.h
index adc3844e3..a9b513e36 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SOFT_RIGID_COLLISION_ALGORITHM_H
-#define SOFT_RIGID_COLLISION_ALGORITHM_H
+#ifndef BT_SOFT_RIGID_COLLISION_ALGORITHM_H
+#define BT_SOFT_RIGID_COLLISION_ALGORITHM_H
 
 #include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -39,11 +39,11 @@ class btSoftRigidCollisionAlgorithm : public btCollisionAlgorithm
 
 public:
 
-	btSoftRigidCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* col0,btCollisionObject* col1, bool isSwapped);
+	btSoftRigidCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* col0,const btCollisionObjectWrapper* col1Wrap, bool isSwapped);
 
 	virtual ~btSoftRigidCollisionAlgorithm();
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -55,21 +55,21 @@ public:
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			void* mem = ci.m_dispatcher1->allocateCollisionAlgorithm(sizeof(btSoftRigidCollisionAlgorithm));
 			if (!m_swapped)
 			{
-				return new(mem) btSoftRigidCollisionAlgorithm(0,ci,body0,body1,false);
+				return new(mem) btSoftRigidCollisionAlgorithm(0,ci,body0Wrap,body1Wrap,false);
 			} else
 			{
-				return new(mem) btSoftRigidCollisionAlgorithm(0,ci,body0,body1,true);
+				return new(mem) btSoftRigidCollisionAlgorithm(0,ci,body0Wrap,body1Wrap,true);
 			}
 		}
 	};
 
 };
 
-#endif //SOFT_RIGID_COLLISION_ALGORITHM_H
+#endif //BT_SOFT_RIGID_COLLISION_ALGORITHM_H
 
 
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.cpp b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.cpp
index 8f3656829..5f3593545 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.cpp
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.cpp
@@ -20,14 +20,28 @@ subject to the following restrictions:
 //softbody & helpers
 #include "btSoftBody.h"
 #include "btSoftBodyHelpers.h"
+#include "btSoftBodySolvers.h"
+#include "btDefaultSoftBodySolver.h"
+#include "LinearMath/btSerializer.h"
 
 
-
-
-
-btSoftRigidDynamicsWorld::btSoftRigidDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration)
-:btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration)
+btSoftRigidDynamicsWorld::btSoftRigidDynamicsWorld(
+	btDispatcher* dispatcher,
+	btBroadphaseInterface* pairCache,
+	btConstraintSolver* constraintSolver,
+	btCollisionConfiguration* collisionConfiguration,
+	btSoftBodySolver *softBodySolver ) : 
+		btDiscreteDynamicsWorld(dispatcher,pairCache,constraintSolver,collisionConfiguration),
+		m_softBodySolver( softBodySolver ),
+		m_ownsSolver(false)
 {
+	if( !m_softBodySolver )
+	{
+		void* ptr = btAlignedAlloc(sizeof(btDefaultSoftBodySolver),16);
+		m_softBodySolver = new(ptr) btDefaultSoftBodySolver();
+		m_ownsSolver = true;
+	}
+
 	m_drawFlags			=	fDrawFlags::Std;
 	m_drawNodeTree		=	true;
 	m_drawFaceTree		=	false;
@@ -37,31 +51,50 @@ btSoftRigidDynamicsWorld::btSoftRigidDynamicsWorld(btDispatcher* dispatcher,btBr
 	m_sbi.m_sparsesdf.Initialize();
 	m_sbi.m_sparsesdf.Reset();
 
+	m_sbi.air_density		=	(btScalar)1.2;
+	m_sbi.water_density	=	0;
+	m_sbi.water_offset		=	0;
+	m_sbi.water_normal		=	btVector3(0,0,0);
+	m_sbi.m_gravity.setValue(0,-10,0);
+
+	m_sbi.m_sparsesdf.Initialize();
+
+
 }
 
 btSoftRigidDynamicsWorld::~btSoftRigidDynamicsWorld()
 {
-
+	if (m_ownsSolver)
+	{
+		m_softBodySolver->~btSoftBodySolver();
+		btAlignedFree(m_softBodySolver);
+	}
 }
 
 void	btSoftRigidDynamicsWorld::predictUnconstraintMotion(btScalar timeStep)
 {
-	btDiscreteDynamicsWorld::predictUnconstraintMotion( timeStep);
-
-	for ( int i=0;i<m_softBodies.size();++i)
+	btDiscreteDynamicsWorld::predictUnconstraintMotion( timeStep );
 	{
-		btSoftBody*	psb= m_softBodies[i];
-
-		psb->predictMotion(timeStep);		
+		BT_PROFILE("predictUnconstraintMotionSoftBody");
+		m_softBodySolver->predictMotion( timeStep );
 	}
 }
 
-void	btSoftRigidDynamicsWorld::internalSingleStepSimulation( btScalar timeStep)
+void	btSoftRigidDynamicsWorld::internalSingleStepSimulation( btScalar timeStep )
 {
+
+	// Let the solver grab the soft bodies and if necessary optimize for it
+	m_softBodySolver->optimize( getSoftBodyArray() );
+
+	if( !m_softBodySolver->checkInitialized() )
+	{
+		btAssert( "Solver initialization failed\n" );
+	}
+
 	btDiscreteDynamicsWorld::internalSingleStepSimulation( timeStep );
 
 	///solve soft bodies constraints
-	solveSoftBodiesConstraints();
+	solveSoftBodiesConstraints( timeStep );
 
 	//self collisions
 	for ( int i=0;i<m_softBodies.size();i++)
@@ -71,22 +104,14 @@ void	btSoftRigidDynamicsWorld::internalSingleStepSimulation( btScalar timeStep)
 	}
 
 	///update soft bodies
-	updateSoftBodies();
+	m_softBodySolver->updateSoftBodies( );
+	
+	// End solver-wise simulation step
+	// ///////////////////////////////
 
 }
 
-void	btSoftRigidDynamicsWorld::updateSoftBodies()
-{
-	BT_PROFILE("updateSoftBodies");
-
-	for ( int i=0;i<m_softBodies.size();i++)
-	{
-		btSoftBody*	psb=(btSoftBody*)m_softBodies[i];
-		psb->integrateMotion();	
-	}
-}
-
-void	btSoftRigidDynamicsWorld::solveSoftBodiesConstraints()
+void	btSoftRigidDynamicsWorld::solveSoftBodiesConstraints( btScalar timeStep )
 {
 	BT_PROFILE("solveSoftConstraints");
 
@@ -95,17 +120,19 @@ void	btSoftRigidDynamicsWorld::solveSoftBodiesConstraints()
 		btSoftBody::solveClusters(m_softBodies);
 	}
 
-	for(int i=0;i<m_softBodies.size();++i)
-	{
-		btSoftBody*	psb=(btSoftBody*)m_softBodies[i];
-		psb->solveConstraints();
-	}	
+	// Solve constraints solver-wise
+	m_softBodySolver->solveConstraints( timeStep * m_softBodySolver->getTimeScale() );
+
 }
 
 void	btSoftRigidDynamicsWorld::addSoftBody(btSoftBody* body,short int collisionFilterGroup,short int collisionFilterMask)
 {
 	m_softBodies.push_back(body);
 
+	// Set the soft body solver that will deal with this body
+	// to be the world's solver
+	body->setSoftBodySolver( m_softBodySolver );
+
 	btCollisionWorld::addCollisionObject(body,
 		collisionFilterGroup,
 		collisionFilterMask);
@@ -138,8 +165,12 @@ void	btSoftRigidDynamicsWorld::debugDrawWorld()
 		for (  i=0;i<this->m_softBodies.size();i++)
 		{
 			btSoftBody*	psb=(btSoftBody*)this->m_softBodies[i];
-			btSoftBodyHelpers::DrawFrame(psb,m_debugDrawer);
-			btSoftBodyHelpers::Draw(psb,m_debugDrawer,m_drawFlags);
+			if (getDebugDrawer() && (getDebugDrawer()->getDebugMode() & (btIDebugDraw::DBG_DrawWireframe)))
+			{
+				btSoftBodyHelpers::DrawFrame(psb,m_debugDrawer);
+				btSoftBodyHelpers::Draw(psb,m_debugDrawer,m_drawFlags);
+			}
+			
 			if (m_debugDrawer && (m_debugDrawer->getDebugMode() & btIDebugDraw::DBG_DrawAabb))
 			{
 				if(m_drawNodeTree)		btSoftBodyHelpers::DrawNodeTree(psb,m_debugDrawer);
@@ -149,3 +180,188 @@ void	btSoftRigidDynamicsWorld::debugDrawWorld()
 		}		
 	}	
 }
+
+
+
+
+struct btSoftSingleRayCallback : public btBroadphaseRayCallback
+{
+	btVector3	m_rayFromWorld;
+	btVector3	m_rayToWorld;
+	btTransform	m_rayFromTrans;
+	btTransform	m_rayToTrans;
+	btVector3	m_hitNormal;
+
+	const btSoftRigidDynamicsWorld*	m_world;
+	btCollisionWorld::RayResultCallback&	m_resultCallback;
+
+	btSoftSingleRayCallback(const btVector3& rayFromWorld,const btVector3& rayToWorld,const btSoftRigidDynamicsWorld* world,btCollisionWorld::RayResultCallback& resultCallback)
+	:m_rayFromWorld(rayFromWorld),
+	m_rayToWorld(rayToWorld),
+	m_world(world),
+	m_resultCallback(resultCallback)
+	{
+		m_rayFromTrans.setIdentity();
+		m_rayFromTrans.setOrigin(m_rayFromWorld);
+		m_rayToTrans.setIdentity();
+		m_rayToTrans.setOrigin(m_rayToWorld);
+
+		btVector3 rayDir = (rayToWorld-rayFromWorld);
+
+		rayDir.normalize ();
+		///what about division by zero? --> just set rayDirection[i] to INF/1e30
+		m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0];
+		m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1];
+		m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2];
+		m_signs[0] = m_rayDirectionInverse[0] < 0.0;
+		m_signs[1] = m_rayDirectionInverse[1] < 0.0;
+		m_signs[2] = m_rayDirectionInverse[2] < 0.0;
+
+		m_lambda_max = rayDir.dot(m_rayToWorld-m_rayFromWorld);
+
+	}
+
+	
+
+	virtual bool	process(const btBroadphaseProxy* proxy)
+	{
+		///terminate further ray tests, once the closestHitFraction reached zero
+		if (m_resultCallback.m_closestHitFraction == btScalar(0.f))
+			return false;
+
+		btCollisionObject*	collisionObject = (btCollisionObject*)proxy->m_clientObject;
+
+		//only perform raycast if filterMask matches
+		if(m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) 
+		{
+			//RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+			//btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+#if 0
+#ifdef RECALCULATE_AABB
+			btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+			collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax);
+#else
+			//getBroadphase()->getAabb(collisionObject->getBroadphaseHandle(),collisionObjectAabbMin,collisionObjectAabbMax);
+			const btVector3& collisionObjectAabbMin = collisionObject->getBroadphaseHandle()->m_aabbMin;
+			const btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax;
+#endif
+#endif
+			//btScalar hitLambda = m_resultCallback.m_closestHitFraction;
+			//culling already done by broadphase
+			//if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal))
+			{
+				m_world->rayTestSingle(m_rayFromTrans,m_rayToTrans,
+					collisionObject,
+						collisionObject->getCollisionShape(),
+						collisionObject->getWorldTransform(),
+						m_resultCallback);
+			}
+		}
+		return true;
+	}
+};
+
+void	btSoftRigidDynamicsWorld::rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const
+{
+	BT_PROFILE("rayTest");
+	/// use the broadphase to accelerate the search for objects, based on their aabb
+	/// and for each object with ray-aabb overlap, perform an exact ray test
+	btSoftSingleRayCallback rayCB(rayFromWorld,rayToWorld,this,resultCallback);
+
+#ifndef USE_BRUTEFORCE_RAYBROADPHASE
+	m_broadphasePairCache->rayTest(rayFromWorld,rayToWorld,rayCB);
+#else
+	for (int i=0;i<this->getNumCollisionObjects();i++)
+	{
+		rayCB.process(m_collisionObjects[i]->getBroadphaseHandle());
+	}	
+#endif //USE_BRUTEFORCE_RAYBROADPHASE
+
+}
+
+
+void	btSoftRigidDynamicsWorld::rayTestSingle(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+					  btCollisionObject* collisionObject,
+					  const btCollisionShape* collisionShape,
+					  const btTransform& colObjWorldTransform,
+					  RayResultCallback& resultCallback)
+{
+	if (collisionShape->isSoftBody()) {
+		btSoftBody* softBody = btSoftBody::upcast(collisionObject);
+		if (softBody) {
+			btSoftBody::sRayCast softResult;
+			if (softBody->rayTest(rayFromTrans.getOrigin(), rayToTrans.getOrigin(), softResult)) 
+			{
+				
+				if (softResult.fraction<= resultCallback.m_closestHitFraction)
+				{
+
+					btCollisionWorld::LocalShapeInfo shapeInfo;
+					shapeInfo.m_shapePart = 0;
+					shapeInfo.m_triangleIndex = softResult.index;
+					// get the normal
+					btVector3 rayDir = rayToTrans.getOrigin() - rayFromTrans.getOrigin();
+					btVector3 normal=-rayDir;
+					normal.normalize();
+
+					if (softResult.feature == btSoftBody::eFeature::Face)
+					{
+						normal = softBody->m_faces[softResult.index].m_normal;
+						if (normal.dot(rayDir) > 0) {
+							// normal always point toward origin of the ray
+							normal = -normal;
+						}
+					}
+	
+					btCollisionWorld::LocalRayResult rayResult
+						(collisionObject,
+						 &shapeInfo,
+						 normal,
+						 softResult.fraction);
+					bool	normalInWorldSpace = true;
+					resultCallback.addSingleResult(rayResult,normalInWorldSpace);
+				}
+			}
+		}
+	} 
+	else {
+		btCollisionWorld::rayTestSingle(rayFromTrans,rayToTrans,collisionObject,collisionShape,colObjWorldTransform,resultCallback);
+	}
+}
+
+
+void	btSoftRigidDynamicsWorld::serializeSoftBodies(btSerializer* serializer)
+{
+	int i;
+	//serialize all collision objects
+	for (i=0;i<m_collisionObjects.size();i++)
+	{
+		btCollisionObject* colObj = m_collisionObjects[i];
+		if (colObj->getInternalType() & btCollisionObject::CO_SOFT_BODY)
+		{
+			int len = colObj->calculateSerializeBufferSize();
+			btChunk* chunk = serializer->allocate(len,1);
+			const char* structType = colObj->serialize(chunk->m_oldPtr, serializer);
+			serializer->finalizeChunk(chunk,structType,BT_SOFTBODY_CODE,colObj);
+		}
+	}
+
+}
+
+void	btSoftRigidDynamicsWorld::serialize(btSerializer* serializer)
+{
+
+	serializer->startSerialization();
+
+	serializeDynamicsWorldInfo( serializer);
+
+	serializeSoftBodies(serializer);
+
+	serializeRigidBodies(serializer);
+
+	serializeCollisionObjects(serializer);
+
+	serializer->finishSerialization();
+}
+
+
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.h
index 6b7ed5dea..3e0efafd6 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftRigidDynamicsWorld.h
@@ -21,6 +21,8 @@ subject to the following restrictions:
 
 typedef	btAlignedObjectArray<btSoftBody*> btSoftBodyArray;
 
+class btSoftBodySolver;
+
 class btSoftRigidDynamicsWorld : public btDiscreteDynamicsWorld
 {
 
@@ -30,6 +32,9 @@ class btSoftRigidDynamicsWorld : public btDiscreteDynamicsWorld
 	bool			m_drawFaceTree;
 	bool			m_drawClusterTree;
 	btSoftBodyWorldInfo m_sbi;
+	///Solver classes that encapsulate multiple soft bodies for solving
+	btSoftBodySolver *m_softBodySolver;
+	bool			m_ownsSolver;
 
 protected:
 
@@ -37,14 +42,13 @@ protected:
 
 	virtual void	internalSingleStepSimulation( btScalar timeStep);
 
-	void	updateSoftBodies();
-
-	void	solveSoftBodiesConstraints();
+	void	solveSoftBodiesConstraints( btScalar timeStep );
 
+	void	serializeSoftBodies(btSerializer* serializer);
 
 public:
 
-	btSoftRigidDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver,btCollisionConfiguration* collisionConfiguration);
+	btSoftRigidDynamicsWorld(btDispatcher* dispatcher,btBroadphaseInterface* pairCache,btConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration, btSoftBodySolver *softBodySolver = 0 );
 
 	virtual ~btSoftRigidDynamicsWorld();
 
@@ -69,6 +73,10 @@ public:
 		return m_sbi;
 	}
 
+	virtual btDynamicsWorldType	getWorldType() const
+	{
+		return	BT_SOFT_RIGID_DYNAMICS_WORLD;
+	}
 
 	btSoftBodyArray& getSoftBodyArray()
 	{
@@ -80,6 +88,20 @@ public:
 		return m_softBodies;
 	}
 
+
+	virtual void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const; 
+
+	/// rayTestSingle performs a raycast call and calls the resultCallback. It is used internally by rayTest.
+	/// In a future implementation, we consider moving the ray test as a virtual method in btCollisionShape.
+	/// This allows more customization.
+	static void	rayTestSingle(const btTransform& rayFromTrans,const btTransform& rayToTrans,
+					  btCollisionObject* collisionObject,
+					  const btCollisionShape* collisionShape,
+					  const btTransform& colObjWorldTransform,
+					  RayResultCallback& resultCallback);
+
+	virtual	void	serialize(btSerializer* serializer);
+
 };
 
 #endif //BT_SOFT_RIGID_DYNAMICS_WORLD_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp b/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp
index 85a727944..72043e69e 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.cpp
@@ -17,11 +17,13 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionDispatch/btCollisionDispatcher.h"
 #include "BulletCollision/CollisionShapes/btBoxShape.h"
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
+#include "BulletSoftBody/btSoftBodySolvers.h"
 #include "btSoftBody.h"
+#include "BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h"
 
 #define USE_PERSISTENT_CONTACTS 1
 
-btSoftSoftCollisionAlgorithm::btSoftSoftCollisionAlgorithm(btPersistentManifold* /*mf*/,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* /*obj0*/,btCollisionObject* /*obj1*/)
+btSoftSoftCollisionAlgorithm::btSoftSoftCollisionAlgorithm(btPersistentManifold* /*mf*/,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* /*obj0*/,const btCollisionObjectWrapper* /*obj1*/)
 : btCollisionAlgorithm(ci)
 //m_ownManifold(false),
 //m_manifoldPtr(mf)
@@ -32,11 +34,11 @@ btSoftSoftCollisionAlgorithm::~btSoftSoftCollisionAlgorithm()
 {
 }
 
-void btSoftSoftCollisionAlgorithm::processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& /*dispatchInfo*/,btManifoldResult* /*resultOut*/)
+void btSoftSoftCollisionAlgorithm::processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& /*dispatchInfo*/,btManifoldResult* /*resultOut*/)
 {
-	btSoftBody* soft0 =	(btSoftBody*)body0;
-	btSoftBody* soft1 =	(btSoftBody*)body1;
-	soft0->defaultCollisionHandler(soft1);
+	btSoftBody* soft0 =	(btSoftBody*)body0Wrap->getCollisionObject();
+	btSoftBody* soft1 =	(btSoftBody*)body1Wrap->getCollisionObject();
+	soft0->getSoftBodySolver()->processCollision(soft0, soft1);
 }
 
 btScalar btSoftSoftCollisionAlgorithm::calculateTimeOfImpact(btCollisionObject* /*body0*/,btCollisionObject* /*body1*/,const btDispatcherInfo& /*dispatchInfo*/,btManifoldResult* /*resultOut*/)
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.h b/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.h
index 1b34e0af6..43b1439cc 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSoftSoftCollisionAlgorithm.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#ifndef SOFT_SOFT_COLLISION_ALGORITHM_H
-#define SOFT_SOFT_COLLISION_ALGORITHM_H
+#ifndef BT_SOFT_SOFT_COLLISION_ALGORITHM_H
+#define BT_SOFT_SOFT_COLLISION_ALGORITHM_H
 
 #include "BulletCollision/BroadphaseCollision/btCollisionAlgorithm.h"
 #include "BulletCollision/BroadphaseCollision/btBroadphaseProxy.h"
@@ -38,7 +38,7 @@ public:
 	btSoftSoftCollisionAlgorithm(const btCollisionAlgorithmConstructionInfo& ci)
 		: btCollisionAlgorithm(ci) {}
 
-	virtual void processCollision (btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
+	virtual void processCollision (const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
 	virtual btScalar calculateTimeOfImpact(btCollisionObject* body0,btCollisionObject* body1,const btDispatcherInfo& dispatchInfo,btManifoldResult* resultOut);
 
@@ -48,22 +48,22 @@ public:
 			manifoldArray.push_back(m_manifoldPtr);
 	}
 
-	btSoftSoftCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,btCollisionObject* body0,btCollisionObject* body1);
+	btSoftSoftCollisionAlgorithm(btPersistentManifold* mf,const btCollisionAlgorithmConstructionInfo& ci,const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap);
 
 	virtual ~btSoftSoftCollisionAlgorithm();
 
 	struct CreateFunc :public 	btCollisionAlgorithmCreateFunc
 	{
-		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, btCollisionObject* body0,btCollisionObject* body1)
+		virtual	btCollisionAlgorithm* CreateCollisionAlgorithm(btCollisionAlgorithmConstructionInfo& ci, const btCollisionObjectWrapper* body0Wrap,const btCollisionObjectWrapper* body1Wrap)
 		{
 			int bbsize = sizeof(btSoftSoftCollisionAlgorithm);
 			void* ptr = ci.m_dispatcher1->allocateCollisionAlgorithm(bbsize);
-			return new(ptr) btSoftSoftCollisionAlgorithm(0,ci,body0,body1);
+			return new(ptr) btSoftSoftCollisionAlgorithm(0,ci,body0Wrap,body1Wrap);
 		}
 	};
 
 };
 
-#endif //SOFT_SOFT_COLLISION_ALGORITHM_H
+#endif //BT_SOFT_SOFT_COLLISION_ALGORITHM_H
 
 
diff --git a/Engine/lib/bullet/src/BulletSoftBody/btSparseSDF.h b/Engine/lib/bullet/src/BulletSoftBody/btSparseSDF.h
index cc4266732..180e3c218 100644
--- a/Engine/lib/bullet/src/BulletSoftBody/btSparseSDF.h
+++ b/Engine/lib/bullet/src/BulletSoftBody/btSparseSDF.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 */
 ///btSparseSdf implementation by Nathanael Presson
 
-#ifndef _14F9D17F_EAE8_4aba_B41C_292DB2AA70F3_
-#define _14F9D17F_EAE8_4aba_B41C_292DB2AA70F3_
+#ifndef BT_SPARSE_SDF_H
+#define BT_SPARSE_SDF_H
 
 #include "BulletCollision/CollisionDispatch/btCollisionObject.h"
 #include "BulletCollision/NarrowPhaseCollision/btGjkEpa2.h"
@@ -58,7 +58,7 @@ struct	btSparseSdf
 		int					c[3];
 		int					puid;
 		unsigned			hash;
-		btCollisionShape*	pclient;
+		const btCollisionShape*	pclient;
 		Cell*				next;
 	};
 	//
@@ -152,7 +152,7 @@ struct	btSparseSdf
 	}
 	//
 	btScalar				Evaluate(	const btVector3& x,
-		btCollisionShape* shape,
+		const btCollisionShape* shape,
 		btVector3& normal,
 		btScalar margin)
 	{
@@ -248,14 +248,14 @@ struct	btSparseSdf
 	}
 	//
 	static inline btScalar	DistanceToShape(const btVector3& x,
-		btCollisionShape* shape)
+		const btCollisionShape* shape)
 	{
 		btTransform	unit;
 		unit.setIdentity();
 		if(shape->isConvex())
 		{
 			btGjkEpaSolver2::sResults	res;
-			btConvexShape*				csh=static_cast<btConvexShape*>(shape);
+			const btConvexShape*				csh=static_cast<const btConvexShape*>(shape);
 			return(btGjkEpaSolver2::SignedDistance(x,0,csh,unit,res));
 		}
 		return(0);
@@ -282,7 +282,7 @@ struct	btSparseSdf
 
 
 	//
-	static inline unsigned int	Hash(int x,int y,int z,btCollisionShape* shape)
+	static inline unsigned int	Hash(int x,int y,int z,const btCollisionShape* shape)
 	{
 		struct btS
 		{ 
@@ -292,7 +292,7 @@ struct	btSparseSdf
 
 		btS myset;
 
-		myset.x=x;myset.y=y;myset.z=z;myset.p=shape;
+		myset.x=x;myset.y=y;myset.z=z;myset.p=(void*)shape;
 		const void* ptr = &myset;
 
 		unsigned int result = HsiehHash<sizeof(btS)/4> (ptr);
@@ -303,4 +303,4 @@ struct	btSparseSdf
 };
 
 
-#endif
+#endif //BT_SPARSE_SDF_H
diff --git a/Engine/lib/bullet/src/BulletSoftBody/premake4.lua b/Engine/lib/bullet/src/BulletSoftBody/premake4.lua
new file mode 100644
index 000000000..339043f5f
--- /dev/null
+++ b/Engine/lib/bullet/src/BulletSoftBody/premake4.lua
@@ -0,0 +1,11 @@
+	project "BulletSoftBody"
+		
+	kind "StaticLib"
+	targetdir "../../lib"
+	includedirs {
+		"..",
+	}
+	files {
+		"**.cpp",
+		"**.h"
+	}
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/CMakeLists.txt b/Engine/lib/bullet/src/CMakeLists.txt
index d901bb341..3a736b42d 100644
--- a/Engine/lib/bullet/src/CMakeLists.txt
+++ b/Engine/lib/bullet/src/CMakeLists.txt
@@ -1,15 +1,32 @@
-if (CMAKE_SIZEOF_VOID_P MATCHES "8")
-SUBDIRS( BulletSoftBody BulletCollision BulletDynamics LinearMath )
-else (CMAKE_SIZEOF_VOID_P MATCHES "8")
-SUBDIRS( BulletMultiThreaded BulletSoftBody BulletCollision BulletDynamics LinearMath )
-endif (CMAKE_SIZEOF_VOID_P MATCHES "8")
+SUBDIRS(  BulletSoftBody BulletCollision BulletDynamics LinearMath )
 
-#INSTALL of other files requires CMake 2.6
-IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-	IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-		# Don't actually need to install any common files, the frameworks include everything
-	ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-        INSTALL(FILES btBulletCollisionCommon.h btBulletDynamicsCommon.h Bullet-C-Api.h DESTINATION include)
-	ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+IF(BUILD_MULTITHREADING)
+	SUBDIRS(MiniCL BulletMultiThreaded)
+ENDIF()
 
+IF(INSTALL_LIBS)
+	#INSTALL of other files requires CMake 2.6
+	IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			# Don't actually need to install any common files, the frameworks include everything
+		ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			INSTALL(FILES btBulletCollisionCommon.h btBulletDynamicsCommon.h Bullet-C-Api.h DESTINATION ${INCLUDE_INSTALL_DIR})
+			INSTALL(FILES vectormath/vmInclude.h DESTINATION ${INCLUDE_INSTALL_DIR}/vectormath)
+			INSTALL(FILES vectormath/scalar/boolInVec.h 
+					vectormath/scalar/floatInVec.h 
+					vectormath/scalar/mat_aos.h 
+					vectormath/scalar/quat_aos.h 
+					vectormath/scalar/vec_aos.h 
+					vectormath/scalar/vectormath_aos.h
+				DESTINATION ${INCLUDE_INSTALL_DIR}/vectormath/scalar)
+			INSTALL(FILES vectormath/sse/boolInVec.h 
+					vectormath/sse/floatInVec.h 
+					vectormath/sse/mat_aos.h 
+					vectormath/sse/quat_aos.h 
+					vectormath/sse/vec_aos.h 
+					vectormath/sse/vecidx_aos.h
+					vectormath/sse/vectormath_aos.h
+				DESTINATION ${INCLUDE_INSTALL_DIR}/vectormath/sse)
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+ENDIF(INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/Jamfile b/Engine/lib/bullet/src/Jamfile
deleted file mode 100644
index 5689366b2..000000000
--- a/Engine/lib/bullet/src/Jamfile
+++ /dev/null
@@ -1,7 +0,0 @@
-SubDir TOP src ;
-SubInclude TOP src BulletMultiThreaded ;
-SubInclude TOP src BulletSoftBody ;
-SubInclude TOP src BulletCollision ;
-SubInclude TOP src BulletDynamics ;
-SubInclude TOP src LinearMath ;
-Recurse InstallHeader : .h  ;
diff --git a/Engine/lib/bullet/src/LinearMath/CMakeLists.txt b/Engine/lib/bullet/src/LinearMath/CMakeLists.txt
index e93add4dd..cc77583a0 100644
--- a/Engine/lib/bullet/src/LinearMath/CMakeLists.txt
+++ b/Engine/lib/bullet/src/LinearMath/CMakeLists.txt
@@ -1,37 +1,45 @@
 
 INCLUDE_DIRECTORIES(
-	${BULLET_PHYSICS_SOURCE_DIR}/src }
+	${BULLET_PHYSICS_SOURCE_DIR}/src
 )
 
 SET(LinearMath_SRCS
 	btAlignedAllocator.cpp
-		btConvexHull.cpp
-		btGeometryUtil.cpp
+	btConvexHull.cpp
+	btConvexHullComputer.cpp
+	btGeometryUtil.cpp
+	btPolarDecomposition.cpp
 	btQuickprof.cpp
+	btSerializer.cpp
+	btVector3.cpp
 )
 
 SET(LinearMath_HDRS
 	btAabbUtil2.h
 	btAlignedAllocator.h
-		btAlignedObjectArray.h
+	btAlignedObjectArray.h
 	btConvexHull.h
-		btDefaultMotionState.h
+	btConvexHullComputer.h
+	btDefaultMotionState.h
 	btGeometryUtil.h
-		btHashMap.h
+	btGrahamScan2dConvexHull.h
+	btHashMap.h
 	btIDebugDraw.h
 	btList.h
 	btMatrix3x3.h
-		btMinMax.h
+	btMinMax.h
 	btMotionState.h
+	btPolarDecomposition.h
 	btPoolAllocator.h
 	btQuadWord.h
-		btQuaternion.h
+	btQuaternion.h
 	btQuickprof.h
 	btRandom.h
 	btScalar.h
-		btStackAlloc.h
-		btTransform.h
-		btTransformUtil.h
+	btSerializer.h
+	btStackAlloc.h
+	btTransform.h
+	btTransformUtil.h
 	btVector3.h
 )
 
@@ -39,17 +47,23 @@ ADD_LIBRARY(LinearMath ${LinearMath_SRCS} ${LinearMath_HDRS})
 SET_TARGET_PROPERTIES(LinearMath PROPERTIES VERSION ${BULLET_VERSION})
 SET_TARGET_PROPERTIES(LinearMath PROPERTIES SOVERSION ${BULLET_VERSION})
 
-#FILES_MATCHING requires CMake 2.6
-IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
-	IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-		INSTALL(TARGETS LinearMath DESTINATION .)
-	ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	INSTALL(TARGETS LinearMath DESTINATION lib)
-	INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DESTINATION include FILES_MATCHING PATTERN "*.h")
-	ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		#FILES_MATCHING requires CMake 2.6
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+			IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS LinearMath DESTINATION .)
+			ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS LinearMath DESTINATION lib${LIB_SUFFIX})
+				INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h"  PATTERN
+".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
+			ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
 
-IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
-	SET_TARGET_PROPERTIES(LinearMath PROPERTIES FRAMEWORK true)
-	SET_TARGET_PROPERTIES(LinearMath PROPERTIES PUBLIC_HEADER "${LinearMath_HDRS}")
-ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(LinearMath PROPERTIES FRAMEWORK true)
+			SET_TARGET_PROPERTIES(LinearMath PROPERTIES PUBLIC_HEADER "${LinearMath_HDRS}")
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
diff --git a/Engine/lib/bullet/src/LinearMath/Jamfile b/Engine/lib/bullet/src/LinearMath/Jamfile
deleted file mode 100644
index 469784104..000000000
--- a/Engine/lib/bullet/src/LinearMath/Jamfile
+++ /dev/null
@@ -1,10 +0,0 @@
-
-SubDir TOP src LinearMath ;
-
-Description bulletmath : "Bullet Math Library" ;
-
-Library bulletmath :
-[ Wildcard *.h *.cpp ]
-	;
-
-#InstallHeader [ Wildcard *.h ] ;
diff --git a/Engine/lib/bullet/src/LinearMath/btAabbUtil2.h b/Engine/lib/bullet/src/LinearMath/btAabbUtil2.h
index 532ce1bf6..d2997b4e6 100644
--- a/Engine/lib/bullet/src/LinearMath/btAabbUtil2.h
+++ b/Engine/lib/bullet/src/LinearMath/btAabbUtil2.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 
 
 
-#ifndef AABB_UTIL2
-#define AABB_UTIL2
+#ifndef BT_AABB_UTIL2
+#define BT_AABB_UTIL2
 
 #include "btTransform.h"
 #include "btVector3.h"
@@ -184,9 +184,7 @@ SIMD_FORCE_INLINE	void btTransformAabb(const btVector3& halfExtents, btScalar ma
 	btVector3 halfExtentsWithMargin = halfExtents+btVector3(margin,margin,margin);
 	btMatrix3x3 abs_b = t.getBasis().absolute();  
 	btVector3 center = t.getOrigin();
-	btVector3 extent = btVector3(abs_b[0].dot(halfExtentsWithMargin),
-		   abs_b[1].dot(halfExtentsWithMargin),
-		  abs_b[2].dot(halfExtentsWithMargin));
+    btVector3 extent = halfExtentsWithMargin.dot3( abs_b[0], abs_b[1], abs_b[2] );
 	aabbMinOut = center - extent;
 	aabbMaxOut = center + extent;
 }
@@ -203,9 +201,7 @@ SIMD_FORCE_INLINE	void btTransformAabb(const btVector3& localAabbMin,const btVec
 		btVector3 localCenter = btScalar(0.5)*(localAabbMax+localAabbMin);
 		btMatrix3x3 abs_b = trans.getBasis().absolute();  
 		btVector3 center = trans(localCenter);
-		btVector3 extent = btVector3(abs_b[0].dot(localHalfExtents),
-			   abs_b[1].dot(localHalfExtents),
-			  abs_b[2].dot(localHalfExtents));
+        btVector3 extent = localHalfExtents.dot3( abs_b[0], abs_b[1], abs_b[2] );
 		aabbMinOut = center-extent;
 		aabbMaxOut = center+extent;
 }
@@ -231,6 +227,6 @@ SIMD_FORCE_INLINE	void btTransformAabb(const btVector3& localAabbMin,const btVec
 	}
 #endif //USE_BANCHLESS
 
-#endif
+#endif //BT_AABB_UTIL2
 
 
diff --git a/Engine/lib/bullet/src/LinearMath/btAlignedAllocator.cpp b/Engine/lib/bullet/src/LinearMath/btAlignedAllocator.cpp
index a3d790f8a..a65296c6a 100644
--- a/Engine/lib/bullet/src/LinearMath/btAlignedAllocator.cpp
+++ b/Engine/lib/bullet/src/LinearMath/btAlignedAllocator.cpp
@@ -58,16 +58,18 @@ static inline void btAlignedFreeDefault(void *ptr)
 	free(ptr);
 }
 #else
+
+
+
+
+
 static inline void *btAlignedAllocDefault(size_t size, int alignment)
 {
   void *ret;
   char *real;
-  unsigned long offset;
-
   real = (char *)sAllocFunc(size + sizeof(void *) + (alignment-1));
   if (real) {
-    offset = (alignment - (unsigned long)(real + sizeof(void *))) & (alignment-1);
-    ret = (void *)((real + sizeof(void *)) + offset);
+	ret = btAlignPointer(real + sizeof(void *),alignment);
     *((void **)(ret)-1) = (void *)(real);
   } else {
     ret = (void *)(real);
@@ -110,7 +112,6 @@ void*   btAlignedAllocInternal  (size_t size, int alignment,int line,char* filen
 {
  void *ret;
  char *real;
- unsigned long offset;
 
  gTotalBytesAlignedAllocs += size;
  gNumAlignedAllocs++;
@@ -118,9 +119,7 @@ void*   btAlignedAllocInternal  (size_t size, int alignment,int line,char* filen
  
  real = (char *)sAllocFunc(size + 2*sizeof(void *) + (alignment-1));
  if (real) {
-   offset = (alignment - (unsigned long)(real + 2*sizeof(void *))) &
-(alignment-1);
-   ret = (void *)((real + 2*sizeof(void *)) + offset);
+   ret = (void*) btAlignPointer(real + 2*sizeof(void *), alignment);
    *((void **)(ret)-1) = (void *)(real);
        *((int*)(ret)-2) = size;
 
@@ -160,22 +159,8 @@ void    btAlignedFreeInternal   (void* ptr,int line,char* filename)
 void*	btAlignedAllocInternal	(size_t size, int alignment)
 {
 	gNumAlignedAllocs++;
-  void* ptr;
-#if defined (BT_HAS_ALIGNED_ALLOCATOR) || defined(__CELLOS_LV2__)
+	void* ptr;
 	ptr = sAlignedAllocFunc(size, alignment);
-#else
-  char *real;
-  unsigned long offset;
-
-  real = (char *)sAllocFunc(size + sizeof(void *) + (alignment-1));
-  if (real) {
-    offset = (alignment - (unsigned long)(real + sizeof(void *))) & (alignment-1);
-    ptr = (void *)((real + sizeof(void *)) + offset);
-    *((void **)(ptr)-1) = (void *)(real);
-  } else {
-    ptr = (void *)(real);
-  }
-#endif  // defined (BT_HAS_ALIGNED_ALLOCATOR) || defined(__CELLOS_LV2__)
 //	printf("btAlignedAllocInternal %d, %x\n",size,ptr);
 	return ptr;
 }
@@ -189,16 +174,7 @@ void	btAlignedFreeInternal	(void* ptr)
 
 	gNumAlignedFree++;
 //	printf("btAlignedFreeInternal %x\n",ptr);
-#if defined (BT_HAS_ALIGNED_ALLOCATOR) || defined(__CELLOS_LV2__)
 	sAlignedFreeFunc(ptr);
-#else
-  void* real;
-
-  if (ptr) {
-    real = *((void **)(ptr)-1);
-    sFreeFunc(real);
-  }
-#endif  // defined (BT_HAS_ALIGNED_ALLOCATOR) || defined(__CELLOS_LV2__)
 }
 
 #endif //BT_DEBUG_MEMORY_ALLOCATIONS
diff --git a/Engine/lib/bullet/src/LinearMath/btAlignedObjectArray.h b/Engine/lib/bullet/src/LinearMath/btAlignedObjectArray.h
index 7c013d375..24e59ab65 100644
--- a/Engine/lib/bullet/src/LinearMath/btAlignedObjectArray.h
+++ b/Engine/lib/bullet/src/LinearMath/btAlignedObjectArray.h
@@ -28,6 +28,7 @@ subject to the following restrictions:
 
 #define BT_USE_PLACEMENT_NEW 1
 //#define BT_USE_MEMCPY 1 //disable, because it is cumbersome to find out for each platform where memcpy is defined. It can be in <memory.h> or <string.h> or otherwise...
+#define BT_ALLOW_ARRAY_COPY_OPERATOR // enabling this can accidently perform deep copies of data if you are not careful
 
 #ifdef BT_USE_MEMCPY
 #include <memory.h>
@@ -53,7 +54,19 @@ class btAlignedObjectArray
 	//PCK: added this line
 	bool				m_ownsMemory;
 
-	protected:
+#ifdef BT_ALLOW_ARRAY_COPY_OPERATOR
+public:
+	SIMD_FORCE_INLINE btAlignedObjectArray<T>& operator=(const btAlignedObjectArray<T> &other)
+	{
+		copyFromArray(other);
+		return *this;
+	}
+#else//BT_ALLOW_ARRAY_COPY_OPERATOR
+private:
+		SIMD_FORCE_INLINE btAlignedObjectArray<T>& operator=(const btAlignedObjectArray<T> &other);
+#endif//BT_ALLOW_ARRAY_COPY_OPERATOR
+
+protected:
 		SIMD_FORCE_INLINE	int	allocSize(int size)
 		{
 			return (size ? size*2 : 1);
@@ -138,13 +151,31 @@ class btAlignedObjectArray
 			return m_size;
 		}
 		
+		SIMD_FORCE_INLINE const T& at(int n) const
+		{
+			btAssert(n>=0);
+			btAssert(n<size());
+			return m_data[n];
+		}
+
+		SIMD_FORCE_INLINE T& at(int n)
+		{
+			btAssert(n>=0);
+			btAssert(n<size());
+			return m_data[n];
+		}
+
 		SIMD_FORCE_INLINE const T& operator[](int n) const
 		{
+			btAssert(n>=0);
+			btAssert(n<size());
 			return m_data[n];
 		}
 
 		SIMD_FORCE_INLINE T& operator[](int n)
 		{
+			btAssert(n>=0);
+			btAssert(n<size());
 			return m_data[n];
 		}
 		
@@ -161,12 +192,31 @@ class btAlignedObjectArray
 
 		SIMD_FORCE_INLINE	void	pop_back()
 		{
+			btAssert(m_size>0);
 			m_size--;
 			m_data[m_size].~T();
 		}
 
+
 		///resize changes the number of elements in the array. If the new size is larger, the new elements will be constructed using the optional second argument.
 		///when the new number of elements is smaller, the destructor will be called, but memory will not be freed, to reduce performance overhead of run-time memory (de)allocations.
+		SIMD_FORCE_INLINE	void	resizeNoInitialize(int newsize)
+		{
+			int curSize = size();
+
+			if (newsize < curSize)
+			{
+			} else
+			{
+				if (newsize > size())
+				{
+					reserve(newsize);
+				}
+				//leave this uninitialized
+			}
+			m_size = newsize;
+		}
+	
 		SIMD_FORCE_INLINE	void	resize(int newsize, const T& fillData=T())
 		{
 			int curSize = size();
@@ -194,7 +244,18 @@ class btAlignedObjectArray
 
 			m_size = newsize;
 		}
-	
+		SIMD_FORCE_INLINE	T&  expandNonInitializing( )
+		{	
+			int sz = size();
+			if( sz == capacity() )
+			{
+				reserve( allocSize(size()) );
+			}
+			m_size++;
+
+			return m_data[sz];		
+		}
+
 
 		SIMD_FORCE_INLINE	T&  expand( const T& fillValue=T())
 		{	
@@ -269,8 +330,9 @@ class btAlignedObjectArray
 				}
 		};
 	
+
 		template <typename L>
-		void quickSortInternal(L CompareFunc,int lo, int hi)
+		void quickSortInternal(const L& CompareFunc,int lo, int hi)
 		{
 		//  lo is the lower index, hi is the upper index
 		//  of the region of array a that is to be sorted
@@ -300,7 +362,7 @@ class btAlignedObjectArray
 
 
 		template <typename L>
-		void quickSort(L CompareFunc)
+		void quickSort(const L& CompareFunc)
 		{
 			//don't sort 0 or 1 elements
 			if (size()>1)
@@ -312,7 +374,7 @@ class btAlignedObjectArray
 
 		///heap sort from http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/
 		template <typename L>
-		void downHeap(T *pArr, int k, int n,L CompareFunc)
+		void downHeap(T *pArr, int k, int n, const L& CompareFunc)
 		{
 			/*  PRE: a[k+1..N] is a heap */
 			/* POST:  a[k..N]  is a heap */
@@ -358,7 +420,7 @@ class btAlignedObjectArray
 		}
 
 	template <typename L>
-	void heapSort(L CompareFunc)
+	void heapSort(const L& CompareFunc)
 	{
 		/* sort a[0..N-1],  N.B. 0 to N-1 */
 		int k;
@@ -384,7 +446,7 @@ class btAlignedObjectArray
 	int	findBinarySearch(const T& key) const
 	{
 		int first = 0;
-		int last = size();
+		int last = size()-1;
 
 		//assume sorted array
 		while (first <= last) {
@@ -437,6 +499,13 @@ class btAlignedObjectArray
 		m_capacity = capacity;
 	}
 
+	void copyFromArray(const btAlignedObjectArray& otherArray)
+	{
+		int otherSize = otherArray.size();
+		resize (otherSize);
+		otherArray.copy(0, otherSize, m_data);
+	}
+
 };
 
 #endif //BT_OBJECT_ARRAY__
diff --git a/Engine/lib/bullet/src/LinearMath/btConvexHull.cpp b/Engine/lib/bullet/src/LinearMath/btConvexHull.cpp
index 85c7c5dcc..2ae855dbc 100644
--- a/Engine/lib/bullet/src/LinearMath/btConvexHull.cpp
+++ b/Engine/lib/bullet/src/LinearMath/btConvexHull.cpp
@@ -16,19 +16,12 @@ subject to the following restrictions:
 #include <string.h>
 
 #include "btConvexHull.h"
-#include "LinearMath/btAlignedObjectArray.h"
-#include "LinearMath/btMinMax.h"
-#include "LinearMath/btVector3.h"
+#include "btAlignedObjectArray.h"
+#include "btMinMax.h"
+#include "btVector3.h"
 
 
 
-template <class T>
-void Swap(T &a,T &b)
-{
-	T tmp = a;
-	a=b;
-	b=tmp;
-}
 
 
 //----------------------------------
@@ -518,7 +511,7 @@ int4 HullLibrary::FindSimplex(btVector3 *verts,int verts_count,btAlignedObjectAr
 	if(p3==p0||p3==p1||p3==p2) 
 		return int4(-1,-1,-1,-1);
 	btAssert(!(p0==p1||p0==p2||p0==p3||p1==p2||p1==p3||p2==p3));
-	if(btDot(verts[p3]-verts[p0],btCross(verts[p1]-verts[p0],verts[p2]-verts[p0])) <0) {Swap(p2,p3);}
+	if(btDot(verts[p3]-verts[p0],btCross(verts[p1]-verts[p0],verts[p2]-verts[p0])) <0) {btSwap(p2,p3);}
 	return int4(p0,p1,p2,p3);
 }
 
@@ -570,7 +563,7 @@ int HullLibrary::calchullgen(btVector3 *verts,int verts_count, int vlimit)
 	vlimit-=4;
 	while(vlimit >0 && ((te=extrudable(epsilon)) != 0))
 	{
-		int3 ti=*te;
+		//int3 ti=*te;
 		int v=te->vmax;
 		btAssert(v != -1);
 		btAssert(!isextreme[v]);  // wtf we've already done this vertex
@@ -1011,9 +1004,9 @@ bool  HullLibrary::CleanupVertices(unsigned int svcount,
 				btScalar y = v[1];
 				btScalar z = v[2];
 
-				btScalar dx = fabsf(x - px );
-				btScalar dy = fabsf(y - py );
-				btScalar dz = fabsf(z - pz );
+				btScalar dx = btFabs(x - px );
+				btScalar dy = btFabs(y - py );
+				btScalar dz = btFabs(z - pz );
 
 				if ( dx < normalepsilon && dy < normalepsilon && dz < normalepsilon )
 				{
@@ -1156,7 +1149,7 @@ void HullLibrary::BringOutYourDead(const btVector3* verts,unsigned int vcount, b
 
 			for (int k=0;k<m_vertexIndexMapping.size();k++)
 			{
-				if (tmpIndices[k]==v)
+				if (tmpIndices[k]==int(v))
 					m_vertexIndexMapping[k]=ocount;
 			}
 
diff --git a/Engine/lib/bullet/src/LinearMath/btConvexHull.h b/Engine/lib/bullet/src/LinearMath/btConvexHull.h
index 92560bddb..69c52bc6f 100644
--- a/Engine/lib/bullet/src/LinearMath/btConvexHull.h
+++ b/Engine/lib/bullet/src/LinearMath/btConvexHull.h
@@ -16,11 +16,11 @@ subject to the following restrictions:
 
 ///includes modifications/improvements by John Ratcliff, see BringOutYourDead below.
 
-#ifndef CD_HULL_H
-#define CD_HULL_H
+#ifndef BT_CD_HULL_H
+#define BT_CD_HULL_H
 
-#include "LinearMath/btVector3.h"
-#include "LinearMath/btAlignedObjectArray.h"
+#include "btVector3.h"
+#include "btAlignedObjectArray.h"
 
 typedef btAlignedObjectArray<unsigned int> TUIntArray;
 
@@ -237,5 +237,5 @@ private:
 };
 
 
-#endif
+#endif //BT_CD_HULL_H
 
diff --git a/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.cpp b/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.cpp
new file mode 100644
index 000000000..c03c901c0
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.cpp
@@ -0,0 +1,2751 @@
+/*
+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#include <string.h>
+
+#include "btConvexHullComputer.h"
+#include "btAlignedObjectArray.h"
+#include "btMinMax.h"
+#include "btVector3.h"
+
+#ifdef __GNUC__
+	#include <stdint.h>
+#elif defined(_MSC_VER)
+	typedef __int32 int32_t;
+	typedef __int64 int64_t;
+	typedef unsigned __int32 uint32_t;
+	typedef unsigned __int64 uint64_t;
+#else
+	typedef int int32_t;
+	typedef long long int int64_t;
+	typedef unsigned int uint32_t;
+	typedef unsigned long long int uint64_t;
+#endif
+
+
+//The definition of USE_X86_64_ASM is moved into the build system. You can enable it manually by commenting out the following lines
+//#if (defined(__GNUC__) && defined(__x86_64__) && !defined(__ICL))  // || (defined(__ICL) && defined(_M_X64))   bug in Intel compiler, disable inline assembly
+//	#define USE_X86_64_ASM
+//#endif
+
+
+//#define DEBUG_CONVEX_HULL
+//#define SHOW_ITERATIONS
+
+#if defined(DEBUG_CONVEX_HULL) || defined(SHOW_ITERATIONS)
+	#include <stdio.h>
+#endif
+
+// Convex hull implementation based on Preparata and Hong
+// Ole Kniemeyer, MAXON Computer GmbH
+class btConvexHullInternal
+{
+	public:
+		
+		class Point64
+		{
+			public:
+				int64_t x;
+				int64_t y;
+				int64_t z;
+				
+				Point64(int64_t x, int64_t y, int64_t z): x(x), y(y), z(z)
+				{
+				}
+
+				bool isZero()
+				{
+					return (x == 0) && (y == 0) && (z == 0);
+				}
+
+				int64_t dot(const Point64& b) const
+				{
+					return x * b.x + y * b.y + z * b.z;
+				}
+		};
+		
+		class Point32
+		{
+			public:
+				int32_t x;
+				int32_t y;
+				int32_t z;
+				int index;
+				
+				Point32()
+				{
+				}
+				
+				Point32(int32_t x, int32_t y, int32_t z): x(x), y(y), z(z), index(-1)
+				{
+				}
+				
+				bool operator==(const Point32& b) const
+				{
+					return (x == b.x) && (y == b.y) && (z == b.z);
+				}
+
+				bool operator!=(const Point32& b) const
+				{
+					return (x != b.x) || (y != b.y) || (z != b.z);
+				}
+
+				bool isZero()
+				{
+					return (x == 0) && (y == 0) && (z == 0);
+				}
+
+				Point64 cross(const Point32& b) const
+				{
+					return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
+				}
+
+				Point64 cross(const Point64& b) const
+				{
+					return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x);
+				}
+
+				int64_t dot(const Point32& b) const
+				{
+					return x * b.x + y * b.y + z * b.z;
+				}
+
+				int64_t dot(const Point64& b) const
+				{
+					return x * b.x + y * b.y + z * b.z;
+				}
+
+				Point32 operator+(const Point32& b) const
+				{
+					return Point32(x + b.x, y + b.y, z + b.z);
+				}
+
+				Point32 operator-(const Point32& b) const
+				{
+					return Point32(x - b.x, y - b.y, z - b.z);
+				}
+		};
+
+		class Int128
+		{
+			public:
+				uint64_t low;
+				uint64_t high;
+
+				Int128()
+				{
+				}
+
+				Int128(uint64_t low, uint64_t high): low(low), high(high)
+				{
+				}
+
+				Int128(uint64_t low): low(low), high(0)
+				{
+				}
+
+				Int128(int64_t value): low(value), high((value >= 0) ? 0 : (uint64_t) -1LL)
+				{
+				}
+
+				static Int128 mul(int64_t a, int64_t b);
+
+				static Int128 mul(uint64_t a, uint64_t b);
+
+				Int128 operator-() const
+				{
+					return Int128((uint64_t) -(int64_t)low, ~high + (low == 0));
+				}
+
+				Int128 operator+(const Int128& b) const
+				{
+#ifdef USE_X86_64_ASM
+					Int128 result;
+					__asm__ ("addq %[bl], %[rl]\n\t"
+									 "adcq %[bh], %[rh]\n\t"
+									 : [rl] "=r" (result.low), [rh] "=r" (result.high)
+									 : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+									 : "cc" );
+					return result;
+#else
+					uint64_t lo = low + b.low;
+					return Int128(lo, high + b.high + (lo < low));
+#endif
+				}
+
+				Int128 operator-(const Int128& b) const
+				{
+#ifdef USE_X86_64_ASM
+					Int128 result;
+					__asm__ ("subq %[bl], %[rl]\n\t"
+									 "sbbq %[bh], %[rh]\n\t"
+									 : [rl] "=r" (result.low), [rh] "=r" (result.high)
+									 : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+									 : "cc" );
+					return result;
+#else
+					return *this + -b;
+#endif
+				}
+
+				Int128& operator+=(const Int128& b)
+				{
+#ifdef USE_X86_64_ASM
+					__asm__ ("addq %[bl], %[rl]\n\t"
+									 "adcq %[bh], %[rh]\n\t"
+									 : [rl] "=r" (low), [rh] "=r" (high)
+									 : "0"(low), "1"(high), [bl] "g"(b.low), [bh] "g"(b.high)
+									 : "cc" );
+#else
+					uint64_t lo = low + b.low;
+					if (lo < low)
+					{
+						++high;
+					}
+					low = lo;
+					high += b.high;
+#endif
+					return *this;
+				}
+
+				Int128& operator++()
+				{
+					if (++low == 0)
+					{
+						++high;
+					}
+					return *this;
+				}
+
+				Int128 operator*(int64_t b) const;
+
+				btScalar toScalar() const
+				{
+					return ((int64_t) high >= 0) ? btScalar(high) * (btScalar(0x100000000LL) * btScalar(0x100000000LL)) + btScalar(low)
+						: -(-*this).toScalar();
+				}
+
+				int getSign() const
+				{
+					return ((int64_t) high < 0) ? -1 : (high || low) ? 1 : 0;
+				}
+
+				bool operator<(const Int128& b) const
+				{
+					return (high < b.high) || ((high == b.high) && (low < b.low));
+				}
+
+				int ucmp(const Int128&b) const
+				{
+					if (high < b.high)
+					{
+						return -1;
+					}
+					if (high > b.high)
+					{
+						return 1;
+					}
+					if (low < b.low)
+					{
+						return -1;
+					}
+					if (low > b.low)
+					{
+						return 1;
+					}
+					return 0;
+				}
+		};
+
+
+		class Rational64
+		{
+			private:
+				uint64_t m_numerator;
+				uint64_t m_denominator;
+				int sign;
+				
+			public:
+				Rational64(int64_t numerator, int64_t denominator)
+				{
+					if (numerator > 0)
+					{
+						sign = 1;
+						m_numerator = (uint64_t) numerator;
+					}
+					else if (numerator < 0)
+					{
+						sign = -1;
+						m_numerator = (uint64_t) -numerator;
+					}
+					else
+					{
+						sign = 0;
+						m_numerator = 0;
+					}
+					if (denominator > 0)
+					{
+						m_denominator = (uint64_t) denominator;
+					}
+					else if (denominator < 0)
+					{
+						sign = -sign;
+						m_denominator = (uint64_t) -denominator;
+					}
+					else
+					{
+						m_denominator = 0;
+					}
+				}
+				
+				bool isNegativeInfinity() const
+				{
+					return (sign < 0) && (m_denominator == 0);
+				}
+				
+				bool isNaN() const
+				{
+					return (sign == 0) && (m_denominator == 0);
+				}
+				
+				int compare(const Rational64& b) const;
+				
+				btScalar toScalar() const
+				{
+					return sign * ((m_denominator == 0) ? SIMD_INFINITY : (btScalar) m_numerator / m_denominator);
+				}
+		};
+
+
+		class Rational128
+		{
+			private:
+				Int128 numerator;
+				Int128 denominator;
+				int sign;
+				bool isInt64;
+
+			public:
+				Rational128(int64_t value)
+				{
+					if (value > 0)
+					{
+						sign = 1;
+						this->numerator = value;
+					}
+					else if (value < 0)
+					{
+						sign = -1;
+						this->numerator = -value;
+					}
+					else
+					{
+						sign = 0;
+						this->numerator = (uint64_t) 0;
+					}
+					this->denominator = (uint64_t) 1;
+					isInt64 = true;
+				}
+
+				Rational128(const Int128& numerator, const Int128& denominator)
+				{
+					sign = numerator.getSign();
+					if (sign >= 0)
+					{
+						this->numerator = numerator;
+					}
+					else
+					{
+						this->numerator = -numerator;
+					}
+					int dsign = denominator.getSign();
+					if (dsign >= 0)
+					{
+						this->denominator = denominator;
+					}
+					else
+					{
+						sign = -sign;
+						this->denominator = -denominator;
+					}
+					isInt64 = false;
+				}
+
+				int compare(const Rational128& b) const;
+
+				int compare(int64_t b) const;
+
+				btScalar toScalar() const
+				{
+					return sign * ((denominator.getSign() == 0) ? SIMD_INFINITY : numerator.toScalar() / denominator.toScalar());
+				}
+		};
+
+		class PointR128
+		{
+			public:
+				Int128 x;
+				Int128 y;
+				Int128 z;
+				Int128 denominator;
+
+				PointR128()
+				{
+				}
+
+				PointR128(Int128 x, Int128 y, Int128 z, Int128 denominator): x(x), y(y), z(z), denominator(denominator)
+				{
+				}
+
+				btScalar xvalue() const
+				{
+					return x.toScalar() / denominator.toScalar();
+				}
+
+				btScalar yvalue() const
+				{
+					return y.toScalar() / denominator.toScalar();
+				}
+
+				btScalar zvalue() const
+				{
+					return z.toScalar() / denominator.toScalar();
+				}
+		};
+
+
+		class Edge;
+		class Face;
+
+		class Vertex
+		{
+			public:
+				Vertex* next;
+				Vertex* prev;
+				Edge* edges;
+				Face* firstNearbyFace;
+				Face* lastNearbyFace;
+				PointR128 point128;
+				Point32 point;
+				int copy;
+				
+				Vertex(): next(NULL), prev(NULL), edges(NULL), firstNearbyFace(NULL), lastNearbyFace(NULL), copy(-1)
+				{
+				}
+
+#ifdef DEBUG_CONVEX_HULL
+				void print()
+				{
+					printf("V%d (%d, %d, %d)", point.index, point.x, point.y, point.z);
+				}
+
+				void printGraph();
+#endif
+
+				Point32 operator-(const Vertex& b) const
+				{
+					return point - b.point;
+				}
+
+				Rational128 dot(const Point64& b) const
+				{
+					return (point.index >= 0) ? Rational128(point.dot(b))
+						: Rational128(point128.x * b.x + point128.y * b.y + point128.z * b.z, point128.denominator);
+				}
+
+				btScalar xvalue() const
+				{
+					return (point.index >= 0) ? btScalar(point.x) : point128.xvalue();
+				}
+
+				btScalar yvalue() const
+				{
+					return (point.index >= 0) ? btScalar(point.y) : point128.yvalue();
+				}
+
+				btScalar zvalue() const
+				{
+					return (point.index >= 0) ? btScalar(point.z) : point128.zvalue();
+				}
+
+				void receiveNearbyFaces(Vertex* src)
+				{
+					if (lastNearbyFace)
+					{
+						lastNearbyFace->nextWithSameNearbyVertex = src->firstNearbyFace;
+					}
+					else
+					{
+						firstNearbyFace = src->firstNearbyFace;
+					}
+					if (src->lastNearbyFace)
+					{
+						lastNearbyFace = src->lastNearbyFace;
+					}
+					for (Face* f = src->firstNearbyFace; f; f = f->nextWithSameNearbyVertex)
+					{
+						btAssert(f->nearbyVertex == src);
+						f->nearbyVertex = this;
+					}
+					src->firstNearbyFace = NULL;
+					src->lastNearbyFace = NULL;
+				}
+		};
+
+
+		class Edge
+		{
+			public:
+				Edge* next;
+				Edge* prev;
+				Edge* reverse;
+				Vertex* target;
+				Face* face;
+				int copy;
+
+				~Edge()
+				{
+					next = NULL;
+					prev = NULL;
+					reverse = NULL;
+					target = NULL;
+					face = NULL;
+				}
+
+				void link(Edge* n)
+				{
+					btAssert(reverse->target == n->reverse->target);
+					next = n;
+					n->prev = this;
+				}
+
+#ifdef DEBUG_CONVEX_HULL
+				void print()
+				{
+					printf("E%p : %d -> %d,  n=%p p=%p   (0 %d\t%d\t%d) -> (%d %d %d)", this, reverse->target->point.index, target->point.index, next, prev,
+								 reverse->target->point.x, reverse->target->point.y, reverse->target->point.z, target->point.x, target->point.y, target->point.z);
+				}
+#endif
+		};
+
+		class Face
+		{
+			public:
+				Face* next;
+				Vertex* nearbyVertex;
+				Face* nextWithSameNearbyVertex;
+				Point32 origin;
+				Point32 dir0;
+				Point32 dir1;
+
+				Face(): next(NULL), nearbyVertex(NULL), nextWithSameNearbyVertex(NULL)
+				{
+				}
+
+				void init(Vertex* a, Vertex* b, Vertex* c)
+				{
+					nearbyVertex = a;
+					origin = a->point;
+					dir0 = *b - *a;
+					dir1 = *c - *a;
+					if (a->lastNearbyFace)
+					{
+						a->lastNearbyFace->nextWithSameNearbyVertex = this;
+					}
+					else
+					{
+						a->firstNearbyFace = this;
+					}
+					a->lastNearbyFace = this;
+				}
+
+				Point64 getNormal()
+				{
+					return dir0.cross(dir1);
+				}
+		};
+
+		template<typename UWord, typename UHWord> class DMul
+		{
+			private:
+				static uint32_t high(uint64_t value)
+				{
+					return (uint32_t) (value >> 32);
+				}
+				
+				static uint32_t low(uint64_t value)
+				{
+					return (uint32_t) value;
+				}
+				
+				static uint64_t mul(uint32_t a, uint32_t b)
+				{
+					return (uint64_t) a * (uint64_t) b;
+				}
+				
+				static void shlHalf(uint64_t& value)
+				{
+					value <<= 32;
+				}
+				
+				static uint64_t high(Int128 value)
+				{
+					return value.high;
+				}
+				
+				static uint64_t low(Int128 value)
+				{
+					return value.low;
+				}
+				
+				static Int128 mul(uint64_t a, uint64_t b)
+				{
+					return Int128::mul(a, b);
+				}
+				
+				static void shlHalf(Int128& value)
+				{
+					value.high = value.low;
+					value.low = 0;
+				}
+				
+			public:
+				
+				static void mul(UWord a, UWord b, UWord& resLow, UWord& resHigh)
+				{
+					UWord p00 = mul(low(a), low(b));
+					UWord p01 = mul(low(a), high(b));
+					UWord p10 = mul(high(a), low(b));
+					UWord p11 = mul(high(a), high(b));
+					UWord p0110 = UWord(low(p01)) + UWord(low(p10));
+					p11 += high(p01);
+					p11 += high(p10);
+					p11 += high(p0110);
+					shlHalf(p0110);
+					p00 += p0110;
+					if (p00 < p0110)
+					{
+						++p11;
+					}
+					resLow = p00;
+					resHigh = p11;
+				}
+		};
+	
+	private:
+
+		class IntermediateHull
+		{
+			public:
+				Vertex* minXy;
+				Vertex* maxXy;
+				Vertex* minYx;
+				Vertex* maxYx;
+				
+				IntermediateHull(): minXy(NULL), maxXy(NULL), minYx(NULL), maxYx(NULL)
+				{
+				}
+				
+				void print();
+		};
+	
+		enum Orientation {NONE, CLOCKWISE, COUNTER_CLOCKWISE};
+
+		template <typename T> class PoolArray
+		{
+			private:
+				T* array;
+				int size;
+
+			public:
+				PoolArray<T>* next;
+
+				PoolArray(int size): size(size), next(NULL)
+				{
+					array = (T*) btAlignedAlloc(sizeof(T) * size, 16);
+				}
+
+				~PoolArray()
+				{
+					btAlignedFree(array);
+				}
+
+				T* init()
+				{
+					T* o = array;
+					for (int i = 0; i < size; i++, o++)
+					{
+						o->next = (i+1 < size) ? o + 1 : NULL;
+					}
+					return array;
+				}
+		};
+
+		template <typename T> class Pool
+		{
+			private:
+				PoolArray<T>* arrays;
+				PoolArray<T>* nextArray;
+				T* freeObjects;
+				int arraySize;
+
+			public:
+				Pool(): arrays(NULL), nextArray(NULL), freeObjects(NULL), arraySize(256)
+				{
+				}
+
+				~Pool()
+				{
+					while (arrays)
+					{
+						PoolArray<T>* p = arrays;
+						arrays = p->next;
+						p->~PoolArray<T>();
+						btAlignedFree(p);
+					}
+				}
+
+				void reset()
+				{
+					nextArray = arrays;
+					freeObjects = NULL;
+				}
+
+				void setArraySize(int arraySize)
+				{
+					this->arraySize = arraySize;
+				}
+
+				T* newObject()
+				{
+					T* o = freeObjects;
+					if (!o)
+					{
+						PoolArray<T>* p = nextArray;
+						if (p)
+						{
+							nextArray = p->next;
+						}
+						else
+						{
+							p = new(btAlignedAlloc(sizeof(PoolArray<T>), 16)) PoolArray<T>(arraySize);
+							p->next = arrays;
+							arrays = p;
+						}
+						o = p->init();
+					}
+					freeObjects = o->next;
+					return new(o) T();
+				};
+
+				void freeObject(T* object)
+				{
+					object->~T();
+					object->next = freeObjects;
+					freeObjects = object;
+				}
+		};
+
+		btVector3 scaling;
+		btVector3 center;
+		Pool<Vertex> vertexPool;
+		Pool<Edge> edgePool;
+		Pool<Face> facePool;
+		btAlignedObjectArray<Vertex*> originalVertices;
+		int mergeStamp;
+		int minAxis;
+		int medAxis;
+		int maxAxis;
+		int usedEdgePairs;
+		int maxUsedEdgePairs;
+
+		static Orientation getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t);
+		Edge* findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot);
+		void findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1);
+
+		Edge* newEdgePair(Vertex* from, Vertex* to);
+
+		void removeEdgePair(Edge* edge)
+		{
+			Edge* n = edge->next;
+			Edge* r = edge->reverse;
+
+			btAssert(edge->target && r->target);
+
+			if (n != edge)
+			{
+				n->prev = edge->prev;
+				edge->prev->next = n;
+				r->target->edges = n;
+			}
+			else
+			{
+				r->target->edges = NULL;
+			}
+			
+			n = r->next;
+			
+			if (n != r)
+			{
+				n->prev = r->prev;
+				r->prev->next = n;
+				edge->target->edges = n;
+			}
+			else
+			{
+				edge->target->edges = NULL;
+			}
+
+			edgePool.freeObject(edge);
+			edgePool.freeObject(r);
+			usedEdgePairs--;
+		}
+		
+		void computeInternal(int start, int end, IntermediateHull& result);
+		
+		bool mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1);
+		
+		void merge(IntermediateHull& h0, IntermediateHull& h1);
+
+		btVector3 toBtVector(const Point32& v);
+
+		btVector3 getBtNormal(Face* face);
+
+		bool shiftFace(Face* face, btScalar amount, btAlignedObjectArray<Vertex*> stack);
+
+	public:
+		Vertex* vertexList;
+
+		void compute(const void* coords, bool doubleCoords, int stride, int count);
+
+		btVector3 getCoordinates(const Vertex* v);
+
+		btScalar shrink(btScalar amount, btScalar clampAmount);
+};
+
+
+btConvexHullInternal::Int128 btConvexHullInternal::Int128::operator*(int64_t b) const
+{
+	bool negative = (int64_t) high < 0;
+	Int128 a = negative ? -*this : *this;
+	if (b < 0)
+	{
+		negative = !negative;
+		b = -b;
+	}
+	Int128 result = mul(a.low, (uint64_t) b);
+	result.high += a.high * (uint64_t) b;
+	return negative ? -result : result;
+}
+
+btConvexHullInternal::Int128 btConvexHullInternal::Int128::mul(int64_t a, int64_t b)
+{
+	Int128 result;
+	
+#ifdef USE_X86_64_ASM
+	__asm__ ("imulq %[b]"
+					 : "=a" (result.low), "=d" (result.high)
+					 : "0"(a), [b] "r"(b)
+					 : "cc" );
+	return result;
+	
+#else
+	bool negative = a < 0;
+	if (negative)
+	{
+		a = -a;
+	}
+	if (b < 0)
+	{
+		negative = !negative;
+		b = -b;
+	}
+	DMul<uint64_t, uint32_t>::mul((uint64_t) a, (uint64_t) b, result.low, result.high);
+	return negative ? -result : result;
+#endif
+}
+
+btConvexHullInternal::Int128 btConvexHullInternal::Int128::mul(uint64_t a, uint64_t b)
+{
+	Int128 result;
+
+#ifdef USE_X86_64_ASM
+	__asm__ ("mulq %[b]"
+					 : "=a" (result.low), "=d" (result.high)
+					 : "0"(a), [b] "r"(b)
+					 : "cc" );
+
+#else
+	DMul<uint64_t, uint32_t>::mul(a, b, result.low, result.high);
+#endif
+
+	return result;
+}
+
+int btConvexHullInternal::Rational64::compare(const Rational64& b) const
+{
+	if (sign != b.sign)
+	{
+		return sign - b.sign;
+	}
+	else if (sign == 0)
+	{
+		return 0;
+	}
+
+	//	return (numerator * b.denominator > b.numerator * denominator) ? sign : (numerator * b.denominator < b.numerator * denominator) ? -sign : 0;
+
+#ifdef USE_X86_64_ASM
+
+	int result;
+	int64_t tmp;
+	int64_t dummy;
+	__asm__ ("mulq %[bn]\n\t"
+					 "movq %%rax, %[tmp]\n\t"
+					 "movq %%rdx, %%rbx\n\t"
+					 "movq %[tn], %%rax\n\t"
+					 "mulq %[bd]\n\t"
+					 "subq %[tmp], %%rax\n\t"
+					 "sbbq %%rbx, %%rdx\n\t" // rdx:rax contains 128-bit-difference "numerator*b.denominator - b.numerator*denominator"
+					 "setnsb %%bh\n\t" // bh=1 if difference is non-negative, bh=0 otherwise
+					 "orq %%rdx, %%rax\n\t"
+					 "setnzb %%bl\n\t" // bl=1 if difference if non-zero, bl=0 if it is zero
+					 "decb %%bh\n\t" // now bx=0x0000 if difference is zero, 0xff01 if it is negative, 0x0001 if it is positive (i.e., same sign as difference)
+					 "shll $16, %%ebx\n\t" // ebx has same sign as difference
+					 : "=&b"(result), [tmp] "=&r"(tmp), "=a"(dummy)
+					 : "a"(denominator), [bn] "g"(b.numerator), [tn] "g"(numerator), [bd] "g"(b.denominator)
+					 : "%rdx", "cc" );
+	return result ? result ^ sign // if sign is +1, only bit 0 of result is inverted, which does not change the sign of result (and cannot result in zero)
+																// if sign is -1, all bits of result are inverted, which changes the sign of result (and again cannot result in zero)
+								: 0;
+
+#else
+
+	return sign * Int128::mul(m_numerator, b.m_denominator).ucmp(Int128::mul(m_denominator, b.m_numerator));
+
+#endif
+}
+
+int btConvexHullInternal::Rational128::compare(const Rational128& b) const
+{
+	if (sign != b.sign)
+	{
+		return sign - b.sign;
+	}
+	else if (sign == 0)
+	{
+		return 0;
+	}
+	if (isInt64)
+	{
+		return -b.compare(sign * (int64_t) numerator.low);
+	}
+
+	Int128 nbdLow, nbdHigh, dbnLow, dbnHigh;
+	DMul<Int128, uint64_t>::mul(numerator, b.denominator, nbdLow, nbdHigh);
+	DMul<Int128, uint64_t>::mul(denominator, b.numerator, dbnLow, dbnHigh);
+
+	int cmp = nbdHigh.ucmp(dbnHigh);
+	if (cmp)
+	{
+		return cmp * sign;
+	}
+	return nbdLow.ucmp(dbnLow) * sign;
+}
+
+int btConvexHullInternal::Rational128::compare(int64_t b) const
+{
+	if (isInt64)
+	{
+		int64_t a = sign * (int64_t) numerator.low;
+		return (a > b) ? 1 : (a < b) ? -1 : 0;
+	}
+	if (b > 0)
+	{
+		if (sign <= 0)
+		{
+			return -1;
+		}
+	}
+	else if (b < 0)
+	{
+		if (sign >= 0)
+		{
+			return 1;
+		}
+		b = -b;
+	}
+	else
+	{
+		return sign;
+	}
+
+	return numerator.ucmp(denominator * b) * sign;
+}
+
+
+btConvexHullInternal::Edge* btConvexHullInternal::newEdgePair(Vertex* from, Vertex* to)
+{
+	btAssert(from && to);
+	Edge* e = edgePool.newObject();
+	Edge* r = edgePool.newObject();
+	e->reverse = r;
+	r->reverse = e;
+	e->copy = mergeStamp;
+	r->copy = mergeStamp;
+	e->target = to;
+	r->target = from;
+	e->face = NULL;
+	r->face = NULL;
+	usedEdgePairs++;
+	if (usedEdgePairs > maxUsedEdgePairs)
+	{
+		maxUsedEdgePairs = usedEdgePairs;
+	}
+	return e;
+}
+
+bool btConvexHullInternal::mergeProjection(IntermediateHull& h0, IntermediateHull& h1, Vertex*& c0, Vertex*& c1)
+{
+	Vertex* v0 = h0.maxYx;
+	Vertex* v1 = h1.minYx;
+	if ((v0->point.x == v1->point.x) && (v0->point.y == v1->point.y))
+	{
+		btAssert(v0->point.z < v1->point.z);
+		Vertex* v1p = v1->prev;
+		if (v1p == v1)
+		{
+			c0 = v0;
+			if (v1->edges)
+			{
+				btAssert(v1->edges->next == v1->edges);
+				v1 = v1->edges->target;
+				btAssert(v1->edges->next == v1->edges);
+			}
+			c1 = v1;
+			return false;
+		}
+		Vertex* v1n = v1->next;
+		v1p->next = v1n;
+		v1n->prev = v1p;
+		if (v1 == h1.minXy)
+		{
+			if ((v1n->point.x < v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y < v1p->point.y)))
+			{
+				h1.minXy = v1n;
+			}
+			else
+			{
+				h1.minXy = v1p;
+			}
+		}
+		if (v1 == h1.maxXy)
+		{
+			if ((v1n->point.x > v1p->point.x) || ((v1n->point.x == v1p->point.x) && (v1n->point.y > v1p->point.y)))
+			{
+				h1.maxXy = v1n;
+			}
+			else
+			{
+				h1.maxXy = v1p;
+			}
+		}
+	}
+	
+	v0 = h0.maxXy;
+	v1 = h1.maxXy;
+	Vertex* v00 = NULL;
+	Vertex* v10 = NULL;
+	int32_t sign = 1;
+
+	for (int side = 0; side <= 1; side++)
+	{		
+		int32_t dx = (v1->point.x - v0->point.x) * sign;
+		if (dx > 0)
+		{
+			while (true)
+			{
+				int32_t dy = v1->point.y - v0->point.y;
+
+				Vertex* w0 = side ? v0->next : v0->prev;
+				if (w0 != v0)
+				{
+					int32_t dx0 = (w0->point.x - v0->point.x) * sign;
+					int32_t dy0 = w0->point.y - v0->point.y;
+					if ((dy0 <= 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx <= dy * dx0))))
+					{
+						v0 = w0;
+						dx = (v1->point.x - v0->point.x) * sign;
+						continue;
+					}
+				}
+
+				Vertex* w1 = side ? v1->next : v1->prev;
+				if (w1 != v1)
+				{
+					int32_t dx1 = (w1->point.x - v1->point.x) * sign;
+					int32_t dy1 = w1->point.y - v1->point.y;
+					int32_t dxn = (w1->point.x - v0->point.x) * sign;
+					if ((dxn > 0) && (dy1 < 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx < dy * dx1))))
+					{
+						v1 = w1;
+						dx = dxn;
+						continue;
+					}
+				}
+
+				break;
+			}
+		}
+		else if (dx < 0)
+		{
+			while (true)
+			{
+				int32_t dy = v1->point.y - v0->point.y;
+				
+				Vertex* w1 = side ? v1->prev : v1->next;
+				if (w1 != v1)
+				{
+					int32_t dx1 = (w1->point.x - v1->point.x) * sign;
+					int32_t dy1 = w1->point.y - v1->point.y;
+					if ((dy1 >= 0) && ((dx1 == 0) || ((dx1 < 0) && (dy1 * dx <= dy * dx1))))
+					{
+						v1 = w1;
+						dx = (v1->point.x - v0->point.x) * sign;
+						continue;
+					}
+				}
+				
+				Vertex* w0 = side ? v0->prev : v0->next;
+				if (w0 != v0)
+				{
+					int32_t dx0 = (w0->point.x - v0->point.x) * sign;
+					int32_t dy0 = w0->point.y - v0->point.y;
+					int32_t dxn = (v1->point.x - w0->point.x) * sign;
+					if ((dxn < 0) && (dy0 > 0) && ((dx0 == 0) || ((dx0 < 0) && (dy0 * dx < dy * dx0))))
+					{
+						v0 = w0;
+						dx = dxn;
+						continue;
+					}
+				}
+				
+				break;
+			}
+		}
+		else
+		{
+			int32_t x = v0->point.x;
+			int32_t y0 = v0->point.y;
+			Vertex* w0 = v0;
+			Vertex* t;
+			while (((t = side ? w0->next : w0->prev) != v0) && (t->point.x == x) && (t->point.y <= y0))
+			{
+				w0 = t;
+				y0 = t->point.y;
+			}
+			v0 = w0;
+
+			int32_t y1 = v1->point.y;
+			Vertex* w1 = v1;
+			while (((t = side ? w1->prev : w1->next) != v1) && (t->point.x == x) && (t->point.y >= y1))
+			{
+				w1 = t;
+				y1 = t->point.y;
+			}
+			v1 = w1;
+		}
+		
+		if (side == 0)
+		{
+			v00 = v0;
+			v10 = v1;
+
+			v0 = h0.minXy;
+			v1 = h1.minXy;
+			sign = -1;
+		}
+	}
+
+	v0->prev = v1;
+	v1->next = v0;
+
+	v00->next = v10;
+	v10->prev = v00;
+
+	if (h1.minXy->point.x < h0.minXy->point.x)
+	{
+		h0.minXy = h1.minXy;
+	}
+	if (h1.maxXy->point.x >= h0.maxXy->point.x)
+	{
+		h0.maxXy = h1.maxXy;
+	}
+	
+	h0.maxYx = h1.maxYx;
+
+	c0 = v00;
+	c1 = v10;
+
+	return true;
+}
+
+void btConvexHullInternal::computeInternal(int start, int end, IntermediateHull& result)
+{
+	int n = end - start;
+	switch (n)
+	{
+		case 0:
+			result.minXy = NULL;
+			result.maxXy = NULL;
+			result.minYx = NULL;
+			result.maxYx = NULL;
+			return;
+		case 2:
+		{
+			Vertex* v = originalVertices[start];
+			Vertex* w = v + 1;
+			if (v->point != w->point)
+			{
+				int32_t dx = v->point.x - w->point.x;
+				int32_t dy = v->point.y - w->point.y;
+
+				if ((dx == 0) && (dy == 0))
+				{
+					if (v->point.z > w->point.z)
+					{
+						Vertex* t = w;
+						w = v;
+						v = t;
+					}
+					btAssert(v->point.z < w->point.z);
+					v->next = v;
+					v->prev = v;
+					result.minXy = v;
+					result.maxXy = v;
+					result.minYx = v;
+					result.maxYx = v;
+				}
+				else
+				{
+					v->next = w;
+					v->prev = w;
+					w->next = v;
+					w->prev = v;
+
+					if ((dx < 0) || ((dx == 0) && (dy < 0)))
+					{
+						result.minXy = v;
+						result.maxXy = w;
+					}
+					else
+					{
+						result.minXy = w;
+						result.maxXy = v;
+					}
+
+					if ((dy < 0) || ((dy == 0) && (dx < 0)))
+					{
+						result.minYx = v;
+						result.maxYx = w;
+					}
+					else
+					{
+						result.minYx = w;
+						result.maxYx = v;
+					}
+				}
+
+				Edge* e = newEdgePair(v, w);
+				e->link(e);
+				v->edges = e;
+
+				e = e->reverse;
+				e->link(e);
+				w->edges = e;
+
+				return;
+			}
+		}
+		// lint -fallthrough
+		case 1:
+		{
+			Vertex* v = originalVertices[start];
+			v->edges = NULL;
+			v->next = v;
+			v->prev = v;
+
+			result.minXy = v;
+			result.maxXy = v;
+			result.minYx = v;
+			result.maxYx = v;
+
+			return;
+		}
+	}
+
+	int split0 = start + n / 2;
+	Point32 p = originalVertices[split0-1]->point;
+	int split1 = split0;
+	while ((split1 < end) && (originalVertices[split1]->point == p))
+	{
+		split1++;
+	}
+	computeInternal(start, split0, result);
+	IntermediateHull hull1;
+	computeInternal(split1, end, hull1);
+#ifdef DEBUG_CONVEX_HULL
+	printf("\n\nMerge\n");
+	result.print();
+	hull1.print();
+#endif
+	merge(result, hull1);
+#ifdef DEBUG_CONVEX_HULL
+	printf("\n  Result\n");
+	result.print();
+#endif
+}
+
+#ifdef DEBUG_CONVEX_HULL
+void btConvexHullInternal::IntermediateHull::print()
+{
+	printf("    Hull\n");
+	for (Vertex* v = minXy; v; )
+	{
+		printf("      ");
+		v->print();
+		if (v == maxXy)
+		{
+			printf(" maxXy");
+		}
+		if (v == minYx)
+		{
+			printf(" minYx");
+		}
+		if (v == maxYx)
+		{
+			printf(" maxYx");
+		}
+		if (v->next->prev != v)
+		{
+			printf(" Inconsistency");
+		}
+		printf("\n");
+		v = v->next;
+		if (v == minXy)
+		{
+			break;
+		}
+	}
+	if (minXy)
+	{		
+		minXy->copy = (minXy->copy == -1) ? -2 : -1;
+		minXy->printGraph();
+	}
+}
+
+void btConvexHullInternal::Vertex::printGraph()
+{
+	print();
+	printf("\nEdges\n");
+	Edge* e = edges;
+	if (e)
+	{
+		do
+		{
+			e->print();
+			printf("\n");
+			e = e->next;
+		} while (e != edges);
+		do
+		{
+			Vertex* v = e->target;
+			if (v->copy != copy)
+			{
+				v->copy = copy;
+				v->printGraph();
+			}
+			e = e->next;
+		} while (e != edges);
+	}
+}
+#endif
+
+btConvexHullInternal::Orientation btConvexHullInternal::getOrientation(const Edge* prev, const Edge* next, const Point32& s, const Point32& t)
+{
+	btAssert(prev->reverse->target == next->reverse->target);
+	if (prev->next == next)
+	{
+		if (prev->prev == next)
+		{
+			Point64 n = t.cross(s);
+			Point64 m = (*prev->target - *next->reverse->target).cross(*next->target - *next->reverse->target);
+			btAssert(!m.isZero());
+			int64_t dot = n.dot(m);
+			btAssert(dot != 0);
+			return (dot > 0) ? COUNTER_CLOCKWISE : CLOCKWISE;
+		}
+		return COUNTER_CLOCKWISE;
+	}
+	else if (prev->prev == next)
+	{
+		return CLOCKWISE;
+	}
+	else
+	{
+		return NONE;
+	}
+}
+
+btConvexHullInternal::Edge* btConvexHullInternal::findMaxAngle(bool ccw, const Vertex* start, const Point32& s, const Point64& rxs, const Point64& sxrxs, Rational64& minCot)
+{
+	Edge* minEdge = NULL;
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("find max edge for %d\n", start->point.index);
+#endif
+	Edge* e = start->edges;
+	if (e)
+	{
+		do
+		{
+			if (e->copy > mergeStamp)
+			{
+				Point32 t = *e->target - *start;
+				Rational64 cot(t.dot(sxrxs), t.dot(rxs));
+#ifdef DEBUG_CONVEX_HULL
+				printf("      Angle is %f (%d) for ", (float) btAtan(cot.toScalar()), (int) cot.isNaN());
+				e->print();
+#endif
+				if (cot.isNaN())
+				{
+					btAssert(ccw ? (t.dot(s) < 0) : (t.dot(s) > 0));
+				}
+				else
+				{
+					int cmp;
+					if (minEdge == NULL)
+					{
+						minCot = cot;
+						minEdge = e;
+					}
+					else if ((cmp = cot.compare(minCot)) < 0)
+					{
+						minCot = cot;
+						minEdge = e;
+					}
+					else if ((cmp == 0) && (ccw == (getOrientation(minEdge, e, s, t) == COUNTER_CLOCKWISE)))
+					{
+						minEdge = e;
+					}
+				}
+#ifdef DEBUG_CONVEX_HULL
+				printf("\n");
+#endif
+			}
+			e = e->next;
+		} while (e != start->edges);
+	}
+	return minEdge;
+}
+
+void btConvexHullInternal::findEdgeForCoplanarFaces(Vertex* c0, Vertex* c1, Edge*& e0, Edge*& e1, Vertex* stop0, Vertex* stop1)
+{
+	Edge* start0 = e0;
+	Edge* start1 = e1;
+	Point32 et0 = start0 ? start0->target->point : c0->point;
+	Point32 et1 = start1 ? start1->target->point : c1->point;
+	Point32 s = c1->point - c0->point;
+	Point64 normal = ((start0 ? start0 : start1)->target->point - c0->point).cross(s);
+	int64_t dist = c0->point.dot(normal);
+	btAssert(!start1 || (start1->target->point.dot(normal) == dist));
+	Point64 perp = s.cross(normal);
+	btAssert(!perp.isZero());
+	
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Advancing %d %d  (%p %p, %d %d)\n", c0->point.index, c1->point.index, start0, start1, start0 ? start0->target->point.index : -1, start1 ? start1->target->point.index : -1);
+#endif
+
+	int64_t maxDot0 = et0.dot(perp);
+	if (e0)
+	{
+		while (e0->target != stop0)
+		{
+			Edge* e = e0->reverse->prev;
+			if (e->target->point.dot(normal) < dist)
+			{
+				break;
+			}
+			btAssert(e->target->point.dot(normal) == dist);
+			if (e->copy == mergeStamp)
+			{
+				break;
+			}
+			int64_t dot = e->target->point.dot(perp);
+			if (dot <= maxDot0)
+			{
+				break;
+			}
+			maxDot0 = dot;
+			e0 = e;
+			et0 = e->target->point;
+		}
+	}
+	
+	int64_t maxDot1 = et1.dot(perp);
+	if (e1)
+	{
+		while (e1->target != stop1)
+		{
+			Edge* e = e1->reverse->next;
+			if (e->target->point.dot(normal) < dist)
+			{
+				break;
+			}
+			btAssert(e->target->point.dot(normal) == dist);
+			if (e->copy == mergeStamp)
+			{
+				break;
+			}
+			int64_t dot = e->target->point.dot(perp);
+			if (dot <= maxDot1)
+			{
+				break;
+			}
+			maxDot1 = dot;
+			e1 = e;
+			et1 = e->target->point;
+		}
+	}
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Starting at %d %d\n", et0.index, et1.index);
+#endif
+
+	int64_t dx = maxDot1 - maxDot0;
+	if (dx > 0)
+	{
+		while (true)
+		{
+			int64_t dy = (et1 - et0).dot(s);
+			
+			if (e0 && (e0->target != stop0))
+			{
+				Edge* f0 = e0->next->reverse;
+				if (f0->copy > mergeStamp)
+				{
+					int64_t dx0 = (f0->target->point - et0).dot(perp);
+					int64_t dy0 = (f0->target->point - et0).dot(s);
+					if ((dx0 == 0) ? (dy0 < 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) >= 0)))
+					{
+						et0 = f0->target->point;
+						dx = (et1 - et0).dot(perp);
+						e0 = (e0 == start0) ? NULL : f0;
+						continue;
+					}
+				}
+			}
+			
+			if (e1 && (e1->target != stop1))
+			{
+				Edge* f1 = e1->reverse->next;
+				if (f1->copy > mergeStamp)
+				{
+					Point32 d1 = f1->target->point - et1;
+					if (d1.dot(normal) == 0)
+					{
+						int64_t dx1 = d1.dot(perp);
+						int64_t dy1 = d1.dot(s);
+						int64_t dxn = (f1->target->point - et0).dot(perp);
+						if ((dxn > 0) && ((dx1 == 0) ? (dy1 < 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) > 0))))
+						{
+							e1 = f1;
+							et1 = e1->target->point;
+							dx = dxn;
+							continue;
+						}
+					}
+					else
+					{
+						btAssert((e1 == start1) && (d1.dot(normal) < 0));
+					}
+				}
+			}
+
+			break;
+		}
+	}
+	else if (dx < 0)
+	{
+		while (true)
+		{
+			int64_t dy = (et1 - et0).dot(s);
+			
+			if (e1 && (e1->target != stop1))
+			{
+				Edge* f1 = e1->prev->reverse;
+				if (f1->copy > mergeStamp)
+				{
+					int64_t dx1 = (f1->target->point - et1).dot(perp);
+					int64_t dy1 = (f1->target->point - et1).dot(s);
+					if ((dx1 == 0) ? (dy1 > 0) : ((dx1 < 0) && (Rational64(dy1, dx1).compare(Rational64(dy, dx)) <= 0)))
+					{
+						et1 = f1->target->point;
+						dx = (et1 - et0).dot(perp);
+						e1 = (e1 == start1) ? NULL : f1;
+						continue;
+					}
+				}
+			}
+			
+			if (e0 && (e0->target != stop0))
+			{
+				Edge* f0 = e0->reverse->prev;
+				if (f0->copy > mergeStamp)
+				{
+					Point32 d0 = f0->target->point - et0;
+					if (d0.dot(normal) == 0)
+					{
+						int64_t dx0 = d0.dot(perp);
+						int64_t dy0 = d0.dot(s);
+						int64_t dxn = (et1 - f0->target->point).dot(perp);
+						if ((dxn < 0) && ((dx0 == 0) ? (dy0 > 0) : ((dx0 < 0) && (Rational64(dy0, dx0).compare(Rational64(dy, dx)) < 0))))
+						{
+							e0 = f0;
+							et0 = e0->target->point;
+							dx = dxn;
+							continue;
+						}
+					}
+					else
+					{
+						btAssert((e0 == start0) && (d0.dot(normal) < 0));
+					}
+				}
+			}
+
+			break;
+		}
+	}
+#ifdef DEBUG_CONVEX_HULL
+	printf("   Advanced edges to %d %d\n", et0.index, et1.index);
+#endif
+}
+
+
+void btConvexHullInternal::merge(IntermediateHull& h0, IntermediateHull& h1)
+{
+	if (!h1.maxXy)
+	{
+		return;
+	}
+	if (!h0.maxXy)
+	{
+		h0 = h1;
+		return;
+	}
+	
+	mergeStamp--;
+
+	Vertex* c0 = NULL;
+	Edge* toPrev0 = NULL;
+	Edge* firstNew0 = NULL;
+	Edge* pendingHead0 = NULL;
+	Edge* pendingTail0 = NULL;
+	Vertex* c1 = NULL;
+	Edge* toPrev1 = NULL;
+	Edge* firstNew1 = NULL;
+	Edge* pendingHead1 = NULL;
+	Edge* pendingTail1 = NULL;
+	Point32 prevPoint;
+
+	if (mergeProjection(h0, h1, c0, c1))
+	{
+		Point32 s = *c1 - *c0;
+		Point64 normal = Point32(0, 0, -1).cross(s);
+		Point64 t = s.cross(normal);
+		btAssert(!t.isZero());
+
+		Edge* e = c0->edges;
+		Edge* start0 = NULL;
+		if (e)
+		{
+			do
+			{
+				int64_t dot = (*e->target - *c0).dot(normal);
+				btAssert(dot <= 0);
+				if ((dot == 0) && ((*e->target - *c0).dot(t) > 0))
+				{
+					if (!start0 || (getOrientation(start0, e, s, Point32(0, 0, -1)) == CLOCKWISE))
+					{
+						start0 = e;
+					}
+				}
+				e = e->next;
+			} while (e != c0->edges);
+		}
+		
+		e = c1->edges;
+		Edge* start1 = NULL;
+		if (e)
+		{
+			do
+			{
+				int64_t dot = (*e->target - *c1).dot(normal);
+				btAssert(dot <= 0);
+				if ((dot == 0) && ((*e->target - *c1).dot(t) > 0))
+				{
+					if (!start1 || (getOrientation(start1, e, s, Point32(0, 0, -1)) == COUNTER_CLOCKWISE))
+					{
+						start1 = e;
+					}
+				}
+				e = e->next;
+			} while (e != c1->edges);
+		}
+
+		if (start0 || start1)
+		{
+			findEdgeForCoplanarFaces(c0, c1, start0, start1, NULL, NULL);
+			if (start0)
+			{
+				c0 = start0->target;
+			}
+			if (start1)
+			{
+				c1 = start1->target;
+			}
+		}
+
+		prevPoint = c1->point;
+		prevPoint.z++;
+	}
+	else
+	{
+		prevPoint = c1->point;
+		prevPoint.x++;
+	}
+
+	Vertex* first0 = c0;
+	Vertex* first1 = c1;
+	bool firstRun = true;
+
+	while (true)
+	{
+		Point32 s = *c1 - *c0;
+		Point32 r = prevPoint - c0->point;
+		Point64 rxs = r.cross(s);
+		Point64 sxrxs = s.cross(rxs);
+		
+#ifdef DEBUG_CONVEX_HULL
+		printf("\n  Checking %d %d\n", c0->point.index, c1->point.index);
+#endif
+		Rational64 minCot0(0, 0);
+		Edge* min0 = findMaxAngle(false, c0, s, rxs, sxrxs, minCot0);
+		Rational64 minCot1(0, 0);
+		Edge* min1 = findMaxAngle(true, c1, s, rxs, sxrxs, minCot1);
+		if (!min0 && !min1)
+		{
+			Edge* e = newEdgePair(c0, c1);
+			e->link(e);
+			c0->edges = e;
+
+			e = e->reverse;
+			e->link(e);
+			c1->edges = e;
+			return;
+		}
+		else
+		{
+			int cmp = !min0 ? 1 : !min1 ? -1 : minCot0.compare(minCot1);
+#ifdef DEBUG_CONVEX_HULL
+			printf("    -> Result %d\n", cmp);
+#endif
+			if (firstRun || ((cmp >= 0) ? !minCot1.isNegativeInfinity() : !minCot0.isNegativeInfinity()))
+			{
+				Edge* e = newEdgePair(c0, c1);
+				if (pendingTail0)
+				{
+					pendingTail0->prev = e;
+				}
+				else
+				{
+					pendingHead0 = e;
+				}
+				e->next = pendingTail0;
+				pendingTail0 = e;
+
+				e = e->reverse;
+				if (pendingTail1)
+				{
+					pendingTail1->next = e;
+				}
+				else
+				{
+					pendingHead1 = e;
+				}
+				e->prev = pendingTail1;
+				pendingTail1 = e;
+			}
+			
+			Edge* e0 = min0;
+			Edge* e1 = min1;
+
+#ifdef DEBUG_CONVEX_HULL
+			printf("   Found min edges to %d %d\n", e0 ? e0->target->point.index : -1, e1 ? e1->target->point.index : -1);
+#endif
+
+			if (cmp == 0)
+			{
+				findEdgeForCoplanarFaces(c0, c1, e0, e1, NULL, NULL);
+			}
+
+			if ((cmp >= 0) && e1)
+			{
+				if (toPrev1)
+				{
+					for (Edge* e = toPrev1->next, *n = NULL; e != min1; e = n)
+					{
+						n = e->next;
+						removeEdgePair(e);
+					}
+				}
+
+				if (pendingTail1)
+				{
+					if (toPrev1)
+					{
+						toPrev1->link(pendingHead1);
+					}
+					else
+					{
+						min1->prev->link(pendingHead1);
+						firstNew1 = pendingHead1;
+					}
+					pendingTail1->link(min1);
+					pendingHead1 = NULL;
+					pendingTail1 = NULL;
+				}
+				else if (!toPrev1)
+				{
+					firstNew1 = min1;
+				}
+
+				prevPoint = c1->point;
+				c1 = e1->target;
+				toPrev1 = e1->reverse;
+			}
+
+			if ((cmp <= 0) && e0)
+			{
+				if (toPrev0)
+				{
+					for (Edge* e = toPrev0->prev, *n = NULL; e != min0; e = n)
+					{
+						n = e->prev;
+						removeEdgePair(e);
+					}
+				}
+
+				if (pendingTail0)
+				{
+					if (toPrev0)
+					{
+						pendingHead0->link(toPrev0);
+					}
+					else
+					{
+						pendingHead0->link(min0->next);
+						firstNew0 = pendingHead0;
+					}
+					min0->link(pendingTail0);
+					pendingHead0 = NULL;
+					pendingTail0 = NULL;
+				}
+				else if (!toPrev0)
+				{
+					firstNew0 = min0;
+				}
+
+				prevPoint = c0->point;
+				c0 = e0->target;
+				toPrev0 = e0->reverse;
+			}
+		}
+
+		if ((c0 == first0) && (c1 == first1))
+		{
+			if (toPrev0 == NULL)
+			{
+				pendingHead0->link(pendingTail0);
+				c0->edges = pendingTail0;
+			}
+			else
+			{
+				for (Edge* e = toPrev0->prev, *n = NULL; e != firstNew0; e = n)
+				{
+					n = e->prev;
+					removeEdgePair(e);
+				}
+				if (pendingTail0)
+				{
+					pendingHead0->link(toPrev0);
+					firstNew0->link(pendingTail0);
+				}
+			}
+
+			if (toPrev1 == NULL)
+			{
+				pendingTail1->link(pendingHead1);
+				c1->edges = pendingTail1;
+			}
+			else
+			{
+				for (Edge* e = toPrev1->next, *n = NULL; e != firstNew1; e = n)
+				{
+					n = e->next;
+					removeEdgePair(e);
+				}
+				if (pendingTail1)
+				{
+					toPrev1->link(pendingHead1);
+					pendingTail1->link(firstNew1);
+				}
+			}
+			
+			return;
+		}
+
+		firstRun = false;
+	}
+}
+
+
+static bool pointCmp(const btConvexHullInternal::Point32& p, const btConvexHullInternal::Point32& q)
+{
+	return (p.y < q.y) || ((p.y == q.y) && ((p.x < q.x) || ((p.x == q.x) && (p.z < q.z))));
+}
+
+void btConvexHullInternal::compute(const void* coords, bool doubleCoords, int stride, int count)
+{
+	btVector3 min(btScalar(1e30), btScalar(1e30), btScalar(1e30)), max(btScalar(-1e30), btScalar(-1e30), btScalar(-1e30));
+	const char* ptr = (const char*) coords;
+	if (doubleCoords)
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const double* v = (const double*) ptr;
+			btVector3 p((btScalar) v[0], (btScalar) v[1], (btScalar) v[2]);
+			ptr += stride;
+			min.setMin(p);
+			max.setMax(p);
+		}
+	}
+	else
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const float* v = (const float*) ptr;
+			btVector3 p(v[0], v[1], v[2]);
+			ptr += stride;
+			min.setMin(p);
+			max.setMax(p);
+		}
+	}
+
+	btVector3 s = max - min;
+	maxAxis = s.maxAxis();
+	minAxis = s.minAxis();
+	if (minAxis == maxAxis)
+	{
+		minAxis = (maxAxis + 1) % 3;
+	}
+	medAxis = 3 - maxAxis - minAxis;
+
+	s /= btScalar(10216);
+	if (((medAxis + 1) % 3) != maxAxis)
+	{
+		s *= -1;
+	}
+	scaling = s;
+
+	if (s[0] != 0)
+	{
+		s[0] = btScalar(1) / s[0];
+	}
+	if (s[1] != 0)
+	{
+		s[1] = btScalar(1) / s[1];
+	}
+	if (s[2] != 0)
+	{
+		s[2] = btScalar(1) / s[2];
+	}
+
+	center = (min + max) * btScalar(0.5);
+
+	btAlignedObjectArray<Point32> points;
+	points.resize(count);
+	ptr = (const char*) coords;
+	if (doubleCoords)
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const double* v = (const double*) ptr;
+			btVector3 p((btScalar) v[0], (btScalar) v[1], (btScalar) v[2]);
+			ptr += stride;
+			p = (p - center) * s;
+			points[i].x = (int32_t) p[medAxis];
+			points[i].y = (int32_t) p[maxAxis];
+			points[i].z = (int32_t) p[minAxis];
+			points[i].index = i;
+		}
+	}
+	else
+	{
+		for (int i = 0; i < count; i++)
+		{
+			const float* v = (const float*) ptr;
+			btVector3 p(v[0], v[1], v[2]);
+			ptr += stride;
+			p = (p - center) * s;
+			points[i].x = (int32_t) p[medAxis];
+			points[i].y = (int32_t) p[maxAxis];
+			points[i].z = (int32_t) p[minAxis];
+			points[i].index = i;
+		}
+	}
+	points.quickSort(pointCmp);
+
+	vertexPool.reset();
+	vertexPool.setArraySize(count);
+	originalVertices.resize(count);
+	for (int i = 0; i < count; i++)
+	{
+		Vertex* v = vertexPool.newObject();
+		v->edges = NULL;
+		v->point = points[i];
+		v->copy = -1;
+		originalVertices[i] = v;
+	}
+
+	points.clear();
+
+	edgePool.reset();
+	edgePool.setArraySize(6 * count);
+
+	usedEdgePairs = 0;
+	maxUsedEdgePairs = 0;
+
+	mergeStamp = -3;
+
+	IntermediateHull hull;
+	computeInternal(0, count, hull);
+	vertexList = hull.minXy;
+#ifdef DEBUG_CONVEX_HULL
+	printf("max. edges %d (3v = %d)", maxUsedEdgePairs, 3 * count);
+#endif
+}
+
+btVector3 btConvexHullInternal::toBtVector(const Point32& v)
+{
+	btVector3 p;
+	p[medAxis] = btScalar(v.x);
+	p[maxAxis] = btScalar(v.y);
+	p[minAxis] = btScalar(v.z);
+	return p * scaling;
+}
+
+btVector3 btConvexHullInternal::getBtNormal(Face* face)
+{
+	return toBtVector(face->dir0).cross(toBtVector(face->dir1)).normalized();
+}
+
+btVector3 btConvexHullInternal::getCoordinates(const Vertex* v)
+{
+	btVector3 p;
+	p[medAxis] = v->xvalue();
+	p[maxAxis] = v->yvalue();
+	p[minAxis] = v->zvalue();
+	return p * scaling + center;
+}
+
+btScalar btConvexHullInternal::shrink(btScalar amount, btScalar clampAmount)
+{
+	if (!vertexList)
+	{
+		return 0;
+	}
+	int stamp = --mergeStamp;
+	btAlignedObjectArray<Vertex*> stack;
+	vertexList->copy = stamp;
+	stack.push_back(vertexList);
+	btAlignedObjectArray<Face*> faces;
+
+	Point32 ref = vertexList->point;
+	Int128 hullCenterX(0, 0);
+	Int128 hullCenterY(0, 0);
+	Int128 hullCenterZ(0, 0);
+	Int128 volume(0, 0);
+
+	while (stack.size() > 0)
+	{
+		Vertex* v = stack[stack.size() - 1];
+		stack.pop_back();
+		Edge* e = v->edges;
+		if (e)
+		{
+			do
+			{
+				if (e->target->copy != stamp)
+				{
+					e->target->copy = stamp;
+					stack.push_back(e->target);
+				}
+				if (e->copy != stamp)
+				{
+					Face* face = facePool.newObject();
+					face->init(e->target, e->reverse->prev->target, v);
+					faces.push_back(face);
+					Edge* f = e;
+
+					Vertex* a = NULL;
+					Vertex* b = NULL;
+					do
+					{
+						if (a && b)
+						{
+							int64_t vol = (v->point - ref).dot((a->point - ref).cross(b->point - ref));
+							btAssert(vol >= 0);
+							Point32 c = v->point + a->point + b->point + ref;
+							hullCenterX += vol * c.x;
+							hullCenterY += vol * c.y;
+							hullCenterZ += vol * c.z;
+							volume += vol;
+						}
+
+						btAssert(f->copy != stamp);
+						f->copy = stamp;
+						f->face = face;
+
+						a = b;
+						b = f->target;
+
+						f = f->reverse->prev;
+					} while (f != e);
+				}
+				e = e->next;
+			} while (e != v->edges);
+		}
+	}
+
+	if (volume.getSign() <= 0)
+	{
+		return 0;
+	}
+
+	btVector3 hullCenter;
+	hullCenter[medAxis] = hullCenterX.toScalar();
+	hullCenter[maxAxis] = hullCenterY.toScalar();
+	hullCenter[minAxis] = hullCenterZ.toScalar();
+	hullCenter /= 4 * volume.toScalar();
+	hullCenter *= scaling;
+
+	int faceCount = faces.size();
+
+	if (clampAmount > 0)
+	{
+		btScalar minDist = SIMD_INFINITY;
+		for (int i = 0; i < faceCount; i++)
+		{
+			btVector3 normal = getBtNormal(faces[i]);
+			btScalar dist = normal.dot(toBtVector(faces[i]->origin) - hullCenter);
+			if (dist < minDist)
+			{
+				minDist = dist;
+			}
+		}
+		
+		if (minDist <= 0)
+		{
+			return 0;
+		}
+
+		amount = btMin(amount, minDist * clampAmount);
+	}
+
+	unsigned int seed = 243703;
+	for (int i = 0; i < faceCount; i++, seed = 1664525 * seed + 1013904223)
+	{
+		btSwap(faces[i], faces[seed % faceCount]);
+	}
+
+	for (int i = 0; i < faceCount; i++)
+	{
+		if (!shiftFace(faces[i], amount, stack))
+		{
+			return -amount;
+		}
+	}
+
+	return amount;
+}
+
+bool btConvexHullInternal::shiftFace(Face* face, btScalar amount, btAlignedObjectArray<Vertex*> stack)
+{
+	btVector3 origShift = getBtNormal(face) * -amount;
+	if (scaling[0] != 0)
+	{
+		origShift[0] /= scaling[0];
+	}
+	if (scaling[1] != 0)
+	{
+		origShift[1] /= scaling[1];
+	}
+	if (scaling[2] != 0)
+	{
+		origShift[2] /= scaling[2];
+	}
+	Point32 shift((int32_t) origShift[medAxis], (int32_t) origShift[maxAxis], (int32_t) origShift[minAxis]);
+	if (shift.isZero())
+	{
+		return true;
+	}
+	Point64 normal = face->getNormal();
+#ifdef DEBUG_CONVEX_HULL
+	printf("\nShrinking face (%d %d %d) (%d %d %d) (%d %d %d) by (%d %d %d)\n",
+				 face->origin.x, face->origin.y, face->origin.z, face->dir0.x, face->dir0.y, face->dir0.z, face->dir1.x, face->dir1.y, face->dir1.z, shift.x, shift.y, shift.z);
+#endif
+	int64_t origDot = face->origin.dot(normal);
+	Point32 shiftedOrigin = face->origin + shift;
+	int64_t shiftedDot = shiftedOrigin.dot(normal);
+	btAssert(shiftedDot <= origDot);
+	if (shiftedDot >= origDot)
+	{
+		return false;
+	}
+
+	Edge* intersection = NULL;
+
+	Edge* startEdge = face->nearbyVertex->edges;
+#ifdef DEBUG_CONVEX_HULL
+	printf("Start edge is ");
+	startEdge->print();
+	printf(", normal is (%lld %lld %lld), shifted dot is %lld\n", normal.x, normal.y, normal.z, shiftedDot);
+#endif
+	Rational128 optDot = face->nearbyVertex->dot(normal);
+	int cmp = optDot.compare(shiftedDot);
+#ifdef SHOW_ITERATIONS
+	int n = 0;
+#endif
+	if (cmp >= 0)
+	{
+		Edge* e = startEdge;
+		do
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			Rational128 dot = e->target->dot(normal);
+			btAssert(dot.compare(origDot) <= 0);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Moving downwards, edge is ");
+			e->print();
+			printf(", dot is %f (%f %lld)\n", (float) dot.toScalar(), (float) optDot.toScalar(), shiftedDot);
+#endif
+			if (dot.compare(optDot) < 0)
+			{
+				int c = dot.compare(shiftedDot);
+				optDot = dot;
+				e = e->reverse;
+				startEdge = e;
+				if (c < 0)
+				{
+					intersection = e;
+					break;
+				}
+				cmp = c;
+			}
+			e = e->prev;
+		} while (e != startEdge);
+
+		if (!intersection)
+		{
+			return false;
+		}
+	}
+	else
+	{
+		Edge* e = startEdge;
+		do
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			Rational128 dot = e->target->dot(normal);
+			btAssert(dot.compare(origDot) <= 0);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Moving upwards, edge is ");
+			e->print();
+			printf(", dot is %f (%f %lld)\n", (float) dot.toScalar(), (float) optDot.toScalar(), shiftedDot);
+#endif
+			if (dot.compare(optDot) > 0)
+			{
+				cmp = dot.compare(shiftedDot);
+				if (cmp >= 0)
+				{
+					intersection = e;
+					break;
+				}
+				optDot = dot;
+				e = e->reverse;
+				startEdge = e;
+			}
+			e = e->prev;
+		} while (e != startEdge);
+		
+		if (!intersection)
+		{
+			return true;
+		}
+	}
+
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to find initial intersection\n", n);
+#endif
+
+	if (cmp == 0)
+	{
+		Edge* e = intersection->reverse->next;
+#ifdef SHOW_ITERATIONS
+		n = 0;
+#endif
+		while (e->target->dot(normal).compare(shiftedDot) <= 0)
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			e = e->next;
+			if (e == intersection->reverse)
+			{
+				return true;
+			}
+#ifdef DEBUG_CONVEX_HULL
+			printf("Checking for outwards edge, current edge is ");
+			e->print();
+			printf("\n");
+#endif
+		}
+#ifdef SHOW_ITERATIONS
+		printf("Needed %d iterations to check for complete containment\n", n);
+#endif
+	}
+	
+	Edge* firstIntersection = NULL;
+	Edge* faceEdge = NULL;
+	Edge* firstFaceEdge = NULL;
+
+#ifdef SHOW_ITERATIONS
+	int m = 0;
+#endif
+	while (true)
+	{
+#ifdef SHOW_ITERATIONS
+		m++;
+#endif
+#ifdef DEBUG_CONVEX_HULL
+		printf("Intersecting edge is ");
+		intersection->print();
+		printf("\n");
+#endif
+		if (cmp == 0)
+		{
+			Edge* e = intersection->reverse->next;
+			startEdge = e;
+#ifdef SHOW_ITERATIONS
+			n = 0;
+#endif
+			while (true)
+			{
+#ifdef SHOW_ITERATIONS
+				n++;
+#endif
+				if (e->target->dot(normal).compare(shiftedDot) >= 0)
+				{
+					break;
+				}
+				intersection = e->reverse;
+				e = e->next;
+				if (e == startEdge)
+				{
+					return true;
+				}
+			}
+#ifdef SHOW_ITERATIONS
+			printf("Needed %d iterations to advance intersection\n", n);
+#endif
+		}
+
+#ifdef DEBUG_CONVEX_HULL
+		printf("Advanced intersecting edge to ");
+		intersection->print();
+		printf(", cmp = %d\n", cmp);
+#endif
+
+		if (!firstIntersection)
+		{
+			firstIntersection = intersection;
+		}
+		else if (intersection == firstIntersection)
+		{
+			break;
+		}
+
+		int prevCmp = cmp;
+		Edge* prevIntersection = intersection;
+		Edge* prevFaceEdge = faceEdge;
+
+		Edge* e = intersection->reverse;
+#ifdef SHOW_ITERATIONS
+		n = 0;
+#endif
+		while (true)
+		{
+#ifdef SHOW_ITERATIONS
+			n++;
+#endif
+			e = e->reverse->prev;
+			btAssert(e != intersection->reverse);
+			cmp = e->target->dot(normal).compare(shiftedDot);
+#ifdef DEBUG_CONVEX_HULL
+			printf("Testing edge ");
+			e->print();
+			printf(" -> cmp = %d\n", cmp);
+#endif
+			if (cmp >= 0)
+			{
+				intersection = e;
+				break;
+			}
+		}
+#ifdef SHOW_ITERATIONS
+		printf("Needed %d iterations to find other intersection of face\n", n);
+#endif
+
+		if (cmp > 0)
+		{
+			Vertex* removed = intersection->target;
+			e = intersection->reverse;
+			if (e->prev == e)
+			{
+				removed->edges = NULL;
+			}
+			else
+			{
+				removed->edges = e->prev;
+				e->prev->link(e->next);
+				e->link(e);
+			}
+#ifdef DEBUG_CONVEX_HULL
+			printf("1: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+			
+			Point64 n0 = intersection->face->getNormal();
+			Point64 n1 = intersection->reverse->face->getNormal();
+			int64_t m00 = face->dir0.dot(n0);
+			int64_t m01 = face->dir1.dot(n0);
+			int64_t m10 = face->dir0.dot(n1);
+			int64_t m11 = face->dir1.dot(n1);
+			int64_t r0 = (intersection->face->origin - shiftedOrigin).dot(n0);
+			int64_t r1 = (intersection->reverse->face->origin - shiftedOrigin).dot(n1);
+			Int128 det = Int128::mul(m00, m11) - Int128::mul(m01, m10);
+			btAssert(det.getSign() != 0);
+			Vertex* v = vertexPool.newObject();
+			v->point.index = -1;
+			v->copy = -1;
+			v->point128 = PointR128(Int128::mul(face->dir0.x * r0, m11) - Int128::mul(face->dir0.x * r1, m01)
+															+ Int128::mul(face->dir1.x * r1, m00) - Int128::mul(face->dir1.x * r0, m10) + det * shiftedOrigin.x,
+															Int128::mul(face->dir0.y * r0, m11) - Int128::mul(face->dir0.y * r1, m01)
+															+ Int128::mul(face->dir1.y * r1, m00) - Int128::mul(face->dir1.y * r0, m10) + det * shiftedOrigin.y,
+															Int128::mul(face->dir0.z * r0, m11) - Int128::mul(face->dir0.z * r1, m01)
+															+ Int128::mul(face->dir1.z * r1, m00) - Int128::mul(face->dir1.z * r0, m10) + det * shiftedOrigin.z,
+															det);
+			v->point.x = (int32_t) v->point128.xvalue();
+			v->point.y = (int32_t) v->point128.yvalue();
+			v->point.z = (int32_t) v->point128.zvalue();
+			intersection->target = v;
+			v->edges = e;
+
+			stack.push_back(v);
+			stack.push_back(removed);
+			stack.push_back(NULL);
+		}
+
+		if (cmp || prevCmp || (prevIntersection->reverse->next->target != intersection->target))
+		{
+			faceEdge = newEdgePair(prevIntersection->target, intersection->target);
+			if (prevCmp == 0)
+			{
+				faceEdge->link(prevIntersection->reverse->next);
+			}
+			if ((prevCmp == 0) || prevFaceEdge)
+			{
+				prevIntersection->reverse->link(faceEdge);
+			}
+			if (cmp == 0)
+			{
+				intersection->reverse->prev->link(faceEdge->reverse);
+			}
+			faceEdge->reverse->link(intersection->reverse);
+		}
+		else
+		{
+			faceEdge = prevIntersection->reverse->next;
+		}
+
+		if (prevFaceEdge)
+		{
+			if (prevCmp > 0)
+			{
+				faceEdge->link(prevFaceEdge->reverse);
+			}
+			else if (faceEdge != prevFaceEdge->reverse)
+			{
+				stack.push_back(prevFaceEdge->target);
+				while (faceEdge->next != prevFaceEdge->reverse)
+				{
+					Vertex* removed = faceEdge->next->target;
+					removeEdgePair(faceEdge->next);
+					stack.push_back(removed);
+#ifdef DEBUG_CONVEX_HULL
+					printf("2: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+				}
+				stack.push_back(NULL);
+			}
+		}
+		faceEdge->face = face;
+		faceEdge->reverse->face = intersection->face;
+
+		if (!firstFaceEdge)
+		{
+			firstFaceEdge = faceEdge;
+		}
+	}
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to process all intersections\n", m);
+#endif
+
+	if (cmp > 0)
+	{
+		firstFaceEdge->reverse->target = faceEdge->target;
+		firstIntersection->reverse->link(firstFaceEdge);
+		firstFaceEdge->link(faceEdge->reverse);
+	}
+	else if (firstFaceEdge != faceEdge->reverse)
+	{
+		stack.push_back(faceEdge->target);
+		while (firstFaceEdge->next != faceEdge->reverse)
+		{
+			Vertex* removed = firstFaceEdge->next->target;
+			removeEdgePair(firstFaceEdge->next);
+			stack.push_back(removed);
+#ifdef DEBUG_CONVEX_HULL
+			printf("3: Removed part contains (%d %d %d)\n", removed->point.x, removed->point.y, removed->point.z);
+#endif
+		}
+		stack.push_back(NULL);
+	}
+
+	btAssert(stack.size() > 0);
+	vertexList = stack[0];
+
+#ifdef DEBUG_CONVEX_HULL
+	printf("Removing part\n");
+#endif
+#ifdef SHOW_ITERATIONS
+	n = 0;
+#endif
+	int pos = 0;
+	while (pos < stack.size())
+	{
+		int end = stack.size();
+		while (pos < end)
+		{
+			Vertex* kept = stack[pos++];
+#ifdef DEBUG_CONVEX_HULL
+			kept->print();
+#endif
+			bool deeper = false;
+			Vertex* removed;
+			while ((removed = stack[pos++]) != NULL)
+			{
+#ifdef SHOW_ITERATIONS
+				n++;
+#endif
+				kept->receiveNearbyFaces(removed);
+				while (removed->edges)
+				{
+					if (!deeper)
+					{
+						deeper = true;
+						stack.push_back(kept);
+					}
+					stack.push_back(removed->edges->target);
+					removeEdgePair(removed->edges);
+				}
+			}
+			if (deeper)
+			{
+				stack.push_back(NULL);
+			}
+		}
+	}
+#ifdef SHOW_ITERATIONS
+	printf("Needed %d iterations to remove part\n", n);
+#endif
+
+	stack.resize(0);
+	face->origin = shiftedOrigin;
+
+	return true;
+}
+
+
+static int getVertexCopy(btConvexHullInternal::Vertex* vertex, btAlignedObjectArray<btConvexHullInternal::Vertex*>& vertices)
+{
+	int index = vertex->copy;
+	if (index < 0)
+	{
+		index = vertices.size();
+		vertex->copy = index;
+		vertices.push_back(vertex);
+#ifdef DEBUG_CONVEX_HULL
+		printf("Vertex %d gets index *%d\n", vertex->point.index, index);
+#endif
+	}
+	return index;
+}
+
+btScalar btConvexHullComputer::compute(const void* coords, bool doubleCoords, int stride, int count, btScalar shrink, btScalar shrinkClamp)
+{
+	if (count <= 0)
+	{
+		vertices.clear();
+		edges.clear();
+		faces.clear();
+		return 0;
+	}
+
+	btConvexHullInternal hull;
+	hull.compute(coords, doubleCoords, stride, count);
+
+	btScalar shift = 0;
+	if ((shrink > 0) && ((shift = hull.shrink(shrink, shrinkClamp)) < 0))
+	{
+		vertices.clear();
+		edges.clear();
+		faces.clear();
+		return shift;
+	}
+
+	vertices.resize(0);
+	edges.resize(0);
+	faces.resize(0);
+
+	btAlignedObjectArray<btConvexHullInternal::Vertex*> oldVertices;
+	getVertexCopy(hull.vertexList, oldVertices);
+	int copied = 0;
+	while (copied < oldVertices.size())
+	{
+		btConvexHullInternal::Vertex* v = oldVertices[copied];
+		vertices.push_back(hull.getCoordinates(v));
+		btConvexHullInternal::Edge* firstEdge = v->edges;
+		if (firstEdge)
+		{
+			int firstCopy = -1;
+			int prevCopy = -1;
+			btConvexHullInternal::Edge* e = firstEdge;
+			do
+			{
+				if (e->copy < 0)
+				{
+					int s = edges.size();
+					edges.push_back(Edge());
+					edges.push_back(Edge());
+					Edge* c = &edges[s];
+					Edge* r = &edges[s + 1];
+					e->copy = s;
+					e->reverse->copy = s + 1;
+					c->reverse = 1;
+					r->reverse = -1;
+					c->targetVertex = getVertexCopy(e->target, oldVertices);
+					r->targetVertex = copied;
+#ifdef DEBUG_CONVEX_HULL
+					printf("      CREATE: Vertex *%d has edge to *%d\n", copied, c->getTargetVertex());
+#endif
+				}
+				if (prevCopy >= 0)
+				{
+					edges[e->copy].next = prevCopy - e->copy;
+				}
+				else
+				{
+					firstCopy = e->copy;
+				}
+				prevCopy = e->copy;
+				e = e->next;
+			} while (e != firstEdge);
+			edges[firstCopy].next = prevCopy - firstCopy;
+		}
+		copied++;
+	}
+
+	for (int i = 0; i < copied; i++)
+	{
+		btConvexHullInternal::Vertex* v = oldVertices[i];
+		btConvexHullInternal::Edge* firstEdge = v->edges;
+		if (firstEdge)
+		{
+			btConvexHullInternal::Edge* e = firstEdge;
+			do
+			{
+				if (e->copy >= 0)
+				{
+#ifdef DEBUG_CONVEX_HULL
+					printf("Vertex *%d has edge to *%d\n", i, edges[e->copy].getTargetVertex());
+#endif
+					faces.push_back(e->copy);
+					btConvexHullInternal::Edge* f = e;
+					do
+					{
+#ifdef DEBUG_CONVEX_HULL
+						printf("   Face *%d\n", edges[f->copy].getTargetVertex());
+#endif
+						f->copy = -1;
+						f = f->reverse->prev;
+					} while (f != e);
+				}
+				e = e->next;
+			} while (e != firstEdge);
+		}
+	}
+
+	return shift;
+}
+
+
+
+
+
diff --git a/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.h b/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.h
new file mode 100644
index 000000000..7240ac4fb
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btConvexHullComputer.h
@@ -0,0 +1,103 @@
+/*
+Copyright (c) 2011 Ole Kniemeyer, MAXON, www.maxon.net
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_CONVEX_HULL_COMPUTER_H
+#define BT_CONVEX_HULL_COMPUTER_H
+
+#include "btVector3.h"
+#include "btAlignedObjectArray.h"
+
+/// Convex hull implementation based on Preparata and Hong
+/// See http://code.google.com/p/bullet/issues/detail?id=275
+/// Ole Kniemeyer, MAXON Computer GmbH
+class btConvexHullComputer
+{
+	private:
+		btScalar compute(const void* coords, bool doubleCoords, int stride, int count, btScalar shrink, btScalar shrinkClamp);
+
+	public:
+
+		class Edge
+		{
+			private:
+				int next;
+				int reverse;
+				int targetVertex;
+
+				friend class btConvexHullComputer;
+
+			public:
+				int getSourceVertex() const
+				{
+					return (this + reverse)->targetVertex;
+				}
+
+				int getTargetVertex() const
+				{
+					return targetVertex;
+				}
+
+				const Edge* getNextEdgeOfVertex() const // clockwise list of all edges of a vertex
+				{
+					return this + next;
+				}
+
+				const Edge* getNextEdgeOfFace() const // counter-clockwise list of all edges of a face
+				{
+					return (this + reverse)->getNextEdgeOfVertex();
+				}
+
+				const Edge* getReverseEdge() const
+				{
+					return this + reverse;
+				}
+		};
+
+
+		// Vertices of the output hull
+		btAlignedObjectArray<btVector3> vertices;
+
+		// Edges of the output hull
+		btAlignedObjectArray<Edge> edges;
+
+		// Faces of the convex hull. Each entry is an index into the "edges" array pointing to an edge of the face. Faces are planar n-gons
+		btAlignedObjectArray<int> faces;
+
+		/*
+		Compute convex hull of "count" vertices stored in "coords". "stride" is the difference in bytes
+		between the addresses of consecutive vertices. If "shrink" is positive, the convex hull is shrunken
+		by that amount (each face is moved by "shrink" length units towards the center along its normal).
+		If "shrinkClamp" is positive, "shrink" is clamped to not exceed "shrinkClamp * innerRadius", where "innerRadius"
+		is the minimum distance of a face to the center of the convex hull.
+
+		The returned value is the amount by which the hull has been shrunken. If it is negative, the amount was so large
+		that the resulting convex hull is empty.
+
+		The output convex hull can be found in the member variables "vertices", "edges", "faces".
+		*/
+		btScalar compute(const float* coords, int stride, int count, btScalar shrink, btScalar shrinkClamp)
+		{
+			return compute(coords, false, stride, count, shrink, shrinkClamp);
+		}
+
+		// same as above, but double precision
+		btScalar compute(const double* coords, int stride, int count, btScalar shrink, btScalar shrinkClamp)
+		{
+			return compute(coords, true, stride, count, shrink, shrinkClamp);
+		}
+};
+
+
+#endif //BT_CONVEX_HULL_COMPUTER_H
+
diff --git a/Engine/lib/bullet/src/LinearMath/btDefaultMotionState.h b/Engine/lib/bullet/src/LinearMath/btDefaultMotionState.h
index d758f77ed..c90b74923 100644
--- a/Engine/lib/bullet/src/LinearMath/btDefaultMotionState.h
+++ b/Engine/lib/bullet/src/LinearMath/btDefaultMotionState.h
@@ -1,14 +1,18 @@
-#ifndef DEFAULT_MOTION_STATE_H
-#define DEFAULT_MOTION_STATE_H
+#ifndef BT_DEFAULT_MOTION_STATE_H
+#define BT_DEFAULT_MOTION_STATE_H
+
+#include "btMotionState.h"
 
 ///The btDefaultMotionState provides a common implementation to synchronize world transforms with offsets.
-struct	btDefaultMotionState : public btMotionState
+ATTRIBUTE_ALIGNED16(struct)	btDefaultMotionState : public btMotionState
 {
 	btTransform m_graphicsWorldTrans;
 	btTransform	m_centerOfMassOffset;
 	btTransform m_startWorldTrans;
 	void*		m_userPointer;
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 	btDefaultMotionState(const btTransform& startTrans = btTransform::getIdentity(),const btTransform& centerOfMassOffset = btTransform::getIdentity())
 		: m_graphicsWorldTrans(startTrans),
 		m_centerOfMassOffset(centerOfMassOffset),
@@ -35,4 +39,4 @@ struct	btDefaultMotionState : public btMotionState
 
 };
 
-#endif //DEFAULT_MOTION_STATE_H
+#endif //BT_DEFAULT_MOTION_STATE_H
diff --git a/Engine/lib/bullet/src/LinearMath/btGrahamScan2dConvexHull.h b/Engine/lib/bullet/src/LinearMath/btGrahamScan2dConvexHull.h
new file mode 100644
index 000000000..e658c5cf0
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btGrahamScan2dConvexHull.h
@@ -0,0 +1,117 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2011 Advanced Micro Devices, Inc.  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+#ifndef GRAHAM_SCAN_2D_CONVEX_HULL_H
+#define GRAHAM_SCAN_2D_CONVEX_HULL_H
+
+
+#include "btVector3.h"
+#include "btAlignedObjectArray.h"
+
+struct GrahamVector3 : public btVector3
+{
+	GrahamVector3(const btVector3& org, int orgIndex)
+		:btVector3(org),
+			m_orgIndex(orgIndex)
+	{
+	}
+	btScalar	m_angle;
+	int m_orgIndex;
+};
+
+
+struct btAngleCompareFunc {
+	btVector3 m_anchor;
+	btAngleCompareFunc(const btVector3& anchor)
+	: m_anchor(anchor) 
+	{
+	}
+	bool operator()(const GrahamVector3& a, const GrahamVector3& b) const {
+		if (a.m_angle != b.m_angle)
+			return a.m_angle < b.m_angle;
+		else
+		{
+			btScalar al = (a-m_anchor).length2();
+			btScalar bl = (b-m_anchor).length2();
+			if (al != bl)
+				return  al < bl;
+			else
+			{
+				return a.m_orgIndex < b.m_orgIndex;
+			}
+		}
+	}
+};
+
+inline void GrahamScanConvexHull2D(btAlignedObjectArray<GrahamVector3>& originalPoints, btAlignedObjectArray<GrahamVector3>& hull, const btVector3& normalAxis)
+{
+	btVector3 axis0,axis1;
+	btPlaneSpace1(normalAxis,axis0,axis1);
+	
+
+	if (originalPoints.size()<=1)
+	{
+		for (int i=0;i<originalPoints.size();i++)
+			hull.push_back(originalPoints[0]);
+		return;
+	}
+	//step1 : find anchor point with smallest projection on axis0 and move it to first location
+	for (int i=0;i<originalPoints.size();i++)
+	{
+//		const btVector3& left = originalPoints[i];
+//		const btVector3& right = originalPoints[0];
+		btScalar projL = originalPoints[i].dot(axis0);
+		btScalar projR = originalPoints[0].dot(axis0);
+		if (projL < projR)
+		{
+			originalPoints.swap(0,i);
+		}
+	}
+
+	//also precompute angles
+	originalPoints[0].m_angle = -1e30f;
+	for (int i=1;i<originalPoints.size();i++)
+	{
+		btVector3 xvec = axis0;
+		btVector3 ar = originalPoints[i]-originalPoints[0];
+		originalPoints[i].m_angle = btCross(xvec, ar).dot(normalAxis) / ar.length();
+	}
+
+	//step 2: sort all points, based on 'angle' with this anchor
+	btAngleCompareFunc comp(originalPoints[0]);
+	originalPoints.quickSortInternal(comp,1,originalPoints.size()-1);
+
+	int i;
+	for (i = 0; i<2; i++) 
+		hull.push_back(originalPoints[i]);
+
+	//step 3: keep all 'convex' points and discard concave points (using back tracking)
+	for (; i != originalPoints.size(); i++) 
+	{
+		bool isConvex = false;
+		while (!isConvex&& hull.size()>1) {
+			btVector3& a = hull[hull.size()-2];
+			btVector3& b = hull[hull.size()-1];
+			isConvex = btCross(a-b,a-originalPoints[i]).dot(normalAxis)> 0;
+			if (!isConvex)
+				hull.pop_back();
+			else 
+				hull.push_back(originalPoints[i]);
+		}
+	}
+}
+
+#endif //GRAHAM_SCAN_2D_CONVEX_HULL_H
diff --git a/Engine/lib/bullet/src/LinearMath/btHashMap.h b/Engine/lib/bullet/src/LinearMath/btHashMap.h
index fbe07d5be..ce07db3ac 100644
--- a/Engine/lib/bullet/src/LinearMath/btHashMap.h
+++ b/Engine/lib/bullet/src/LinearMath/btHashMap.h
@@ -1,3 +1,19 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+
 #ifndef BT_HASH_MAP_H
 #define BT_HASH_MAP_H
 
@@ -18,8 +34,8 @@ struct btHashString
 		:m_string(name)
 	{
 		/* magic numbers from http://www.isthe.com/chongo/tech/comp/fnv/ */
-		static const unsigned int  InitialFNV = 2166136261;
-		static const unsigned int FNVMultiple = 16777619;
+		static const unsigned int  InitialFNV = 2166136261u;
+		static const unsigned int FNVMultiple = 16777619u;
 
 		/* Fowler / Noll / Vo (FNV) Hash */
 		unsigned int hash = InitialFNV;
@@ -47,7 +63,7 @@ struct btHashString
 			return( ret );
 	}
 
-	const bool equals(const btHashString& other) const
+	bool equals(const btHashString& other) const
 	{
 		return (m_string == other.m_string) ||
 			(0==portableStringCompare(m_string,other.m_string));
@@ -58,14 +74,123 @@ struct btHashString
 
 const int BT_HASH_NULL=0xffffffff;
 
+
+class btHashInt
+{
+	int	m_uid;
+public:
+	btHashInt(int uid)	:m_uid(uid)
+	{
+	}
+
+	int	getUid1() const
+	{
+		return m_uid;
+	}
+
+	void	setUid1(int uid)
+	{
+		m_uid = uid;
+	}
+
+	bool equals(const btHashInt& other) const
+	{
+		return getUid1() == other.getUid1();
+	}
+	//to our success
+	SIMD_FORCE_INLINE	unsigned int getHash()const
+	{
+		int key = m_uid;
+		// Thomas Wang's hash
+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
+		return key;
+	}
+};
+
+
+
+class btHashPtr
+{
+
+	union
+	{
+		const void*	m_pointer;
+		int	m_hashValues[2];
+	};
+
+public:
+
+	btHashPtr(const void* ptr)
+		:m_pointer(ptr)
+	{
+	}
+
+	const void*	getPointer() const
+	{
+		return m_pointer;
+	}
+
+	bool equals(const btHashPtr& other) const
+	{
+		return getPointer() == other.getPointer();
+	}
+
+	//to our success
+	SIMD_FORCE_INLINE	unsigned int getHash()const
+	{
+		const bool VOID_IS_8 = ((sizeof(void*)==8));
+		
+		int key = VOID_IS_8? m_hashValues[0]+m_hashValues[1] : m_hashValues[0];
+	
+		// Thomas Wang's hash
+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
+		return key;
+	}
+
+	
+};
+
+
+template <class Value>
+class btHashKeyPtr
+{
+        int     m_uid;
+public:
+
+        btHashKeyPtr(int uid)    :m_uid(uid)
+        {
+        }
+
+        int     getUid1() const
+        {
+                return m_uid;
+        }
+
+        bool equals(const btHashKeyPtr<Value>& other) const
+        {
+                return getUid1() == other.getUid1();
+        }
+
+        //to our success
+        SIMD_FORCE_INLINE       unsigned int getHash()const
+        {
+                int key = m_uid;
+                // Thomas Wang's hash
+                key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
+                return key;
+        }
+
+        
+};
+
+
 template <class Value>
 class btHashKey
 {
 	int	m_uid;
 public:
 
-	btHashKey(int uid)
-		:m_uid(uid)
+	btHashKey(int uid)	:m_uid(uid)
 	{
 	}
 
@@ -83,70 +208,26 @@ public:
 	{
 		int key = m_uid;
 		// Thomas Wang's hash
-		key += ~(key << 15);
-		key ^=  (key >> 10);
-		key +=  (key << 3);
-		key ^=  (key >> 6);
-		key += ~(key << 11);
-		key ^=  (key >> 16);
+		key += ~(key << 15);	key ^=  (key >> 10);	key +=  (key << 3);	key ^=  (key >> 6);	key += ~(key << 11);	key ^=  (key >> 16);
 		return key;
 	}
-
-	
 };
 
 
-template <class Value>
-class btHashKeyPtr
-{
-	int	m_uid;
-public:
-
-	btHashKeyPtr(int uid)
-		:m_uid(uid)
-	{
-	}
-
-	int	getUid1() const
-	{
-		return m_uid;
-	}
-
-	bool equals(const btHashKeyPtr<Value>& other) const
-	{
-		return getUid1() == other.getUid1();
-	}
-
-	//to our success
-	SIMD_FORCE_INLINE	unsigned int getHash()const
-	{
-		int key = m_uid;
-		// Thomas Wang's hash
-		key += ~(key << 15);
-		key ^=  (key >> 10);
-		key +=  (key << 3);
-		key ^=  (key >> 6);
-		key += ~(key << 11);
-		key ^=  (key >> 16);
-		return key;
-	}
-
-	
-};
-
 ///The btHashMap template class implements a generic and lightweight hashmap.
 ///A basic sample of how to use btHashMap is located in Demos\BasicDemo\main.cpp
 template <class Key, class Value>
 class btHashMap
 {
 
+protected:
 	btAlignedObjectArray<int>		m_hashTable;
 	btAlignedObjectArray<int>		m_next;
 	
 	btAlignedObjectArray<Value>		m_valueArray;
 	btAlignedObjectArray<Key>		m_keyArray;
 
-	void	growTables(const Key& key)
+	void	growTables(const Key& /*key*/)
 	{
 		int newCapacity = m_valueArray.capacity();
 
@@ -259,7 +340,6 @@ class btHashMap
 		}
 
 		// Remove the last pair from the hash table.
-		const Value* lastValue = &m_valueArray[lastPairIndex];
 		int lastHash = m_keyArray[lastPairIndex].getHash() & (m_valueArray.capacity()-1);
 
 		index = m_hashTable[lastHash];
diff --git a/Engine/lib/bullet/src/LinearMath/btIDebugDraw.h b/Engine/lib/bullet/src/LinearMath/btIDebugDraw.h
index a0cbf1dfc..a00d7763a 100644
--- a/Engine/lib/bullet/src/LinearMath/btIDebugDraw.h
+++ b/Engine/lib/bullet/src/LinearMath/btIDebugDraw.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 */
 
 
-#ifndef IDEBUG_DRAW__H
-#define IDEBUG_DRAW__H
+#ifndef BT_IDEBUG_DRAW__H
+#define BT_IDEBUG_DRAW__H
 
 #include "btVector3.h"
 #include "btTransform.h"
@@ -24,6 +24,7 @@ subject to the following restrictions:
 ///The btIDebugDraw interface class allows hooking up a debug renderer to visually debug simulations.
 ///Typical use case: create a debug drawer object, and assign it to a btCollisionWorld or btDynamicsWorld using setDebugDrawer and call debugDrawWorld.
 ///A class that implements the btIDebugDraw interface has to implement the drawLine method at a minimum.
+///For color arguments the X,Y,Z components refer to Red, Green and Blue each in the range [0..1]
 class	btIDebugDraw
 {
 	public:
@@ -45,25 +46,54 @@ class	btIDebugDraw
 		DBG_DrawConstraints = (1 << 11),
 		DBG_DrawConstraintLimits = (1 << 12),
 		DBG_FastWireframe = (1<<13),
+        DBG_DrawNormals = (1<<14),
 		DBG_MAX_DEBUG_DRAW_MODE
 	};
 
 	virtual ~btIDebugDraw() {};
 
+	virtual void	drawLine(const btVector3& from,const btVector3& to,const btVector3& color)=0;
+		
 	virtual void    drawLine(const btVector3& from,const btVector3& to, const btVector3& fromColor, const btVector3& toColor)
 	{
+        (void) toColor;
 		drawLine (from, to, fromColor);
 	}
 
-	virtual void	drawBox (const btVector3& boxMin, const btVector3& boxMax, const btVector3& color, btScalar alpha)
+	virtual void	drawSphere(btScalar radius, const btTransform& transform, const btVector3& color)
 	{
-	}
+		btVector3 start = transform.getOrigin();
 
+		const btVector3 xoffs = transform.getBasis() * btVector3(radius,0,0);
+		const btVector3 yoffs = transform.getBasis() * btVector3(0,radius,0);
+		const btVector3 zoffs = transform.getBasis() * btVector3(0,0,radius);
+
+		// XY 
+		drawLine(start-xoffs, start+yoffs, color);
+		drawLine(start+yoffs, start+xoffs, color);
+		drawLine(start+xoffs, start-yoffs, color);
+		drawLine(start-yoffs, start-xoffs, color);
+
+		// XZ
+		drawLine(start-xoffs, start+zoffs, color);
+		drawLine(start+zoffs, start+xoffs, color);
+		drawLine(start+xoffs, start-zoffs, color);
+		drawLine(start-zoffs, start-xoffs, color);
+
+		// YZ
+		drawLine(start-yoffs, start+zoffs, color);
+		drawLine(start+zoffs, start+yoffs, color);
+		drawLine(start+yoffs, start-zoffs, color);
+		drawLine(start-zoffs, start-yoffs, color);
+	}
+	
 	virtual void	drawSphere (const btVector3& p, btScalar radius, const btVector3& color)
 	{
+		btTransform tr;
+		tr.setIdentity();
+		tr.setOrigin(p);
+		drawSphere(radius,tr,color);
 	}
-
-	virtual void	drawLine(const btVector3& from,const btVector3& to,const btVector3& color)=0;
 	
 	virtual	void	drawTriangle(const btVector3& v0,const btVector3& v1,const btVector3& v2,const btVector3& /*n0*/,const btVector3& /*n1*/,const btVector3& /*n2*/,const btVector3& color, btScalar alpha)
 	{
@@ -86,7 +116,7 @@ class	btIDebugDraw
 	
 	virtual int		getDebugMode() const = 0;
 
-	inline void drawAabb(const btVector3& from,const btVector3& to,const btVector3& color)
+	virtual void drawAabb(const btVector3& from,const btVector3& to,const btVector3& color)
 	{
 
 		btVector3 halfExtents = (to-from)* 0.5f;
@@ -115,7 +145,7 @@ class	btIDebugDraw
 				edgecoord[i]*=-1.f;
 		}
 	}
-	void drawTransform(const btTransform& transform, btScalar orthoLen)
+	virtual void drawTransform(const btTransform& transform, btScalar orthoLen)
 	{
 		btVector3 start = transform.getOrigin();
 		drawLine(start, start+transform.getBasis() * btVector3(orthoLen, 0, 0), btVector3(0.7f,0,0));
@@ -123,7 +153,7 @@ class	btIDebugDraw
 		drawLine(start, start+transform.getBasis() * btVector3(0, 0, orthoLen), btVector3(0,0,0.7f));
 	}
 
-	void drawArc(const btVector3& center, const btVector3& normal, const btVector3& axis, btScalar radiusA, btScalar radiusB, btScalar minAngle, btScalar maxAngle, 
+	virtual void drawArc(const btVector3& center, const btVector3& normal, const btVector3& axis, btScalar radiusA, btScalar radiusB, btScalar minAngle, btScalar maxAngle, 
 				const btVector3& color, bool drawSect, btScalar stepDegrees = btScalar(10.f))
 	{
 		const btVector3& vx = axis;
@@ -148,7 +178,7 @@ class	btIDebugDraw
 			drawLine(center, prev, color);
 		}
 	}
-	void drawSpherePatch(const btVector3& center, const btVector3& up, const btVector3& axis, btScalar radius, 
+	virtual void drawSpherePatch(const btVector3& center, const btVector3& up, const btVector3& axis, btScalar radius, 
 		btScalar minTh, btScalar maxTh, btScalar minPs, btScalar maxPs, const btVector3& color, btScalar stepDegrees = btScalar(10.f))
 	{
 		btVector3 vA[74];
@@ -250,7 +280,8 @@ class	btIDebugDraw
 		}
 	}
 	
-	void drawBox(const btVector3& bbMin, const btVector3& bbMax, const btVector3& color)
+  
+	virtual void drawBox(const btVector3& bbMin, const btVector3& bbMax, const btVector3& color)
 	{
 		drawLine(btVector3(bbMin[0], bbMin[1], bbMin[2]), btVector3(bbMax[0], bbMin[1], bbMin[2]), color);
 		drawLine(btVector3(bbMax[0], bbMin[1], bbMin[2]), btVector3(bbMax[0], bbMax[1], bbMin[2]), color);
@@ -265,7 +296,7 @@ class	btIDebugDraw
 		drawLine(btVector3(bbMax[0], bbMax[1], bbMax[2]), btVector3(bbMin[0], bbMax[1], bbMax[2]), color);
 		drawLine(btVector3(bbMin[0], bbMax[1], bbMax[2]), btVector3(bbMin[0], bbMin[1], bbMax[2]), color);
 	}
-	void drawBox(const btVector3& bbMin, const btVector3& bbMax, const btTransform& trans, const btVector3& color)
+	virtual void drawBox(const btVector3& bbMin, const btVector3& bbMax, const btTransform& trans, const btVector3& color)
 	{
 		drawLine(trans * btVector3(bbMin[0], bbMin[1], bbMin[2]), trans * btVector3(bbMax[0], bbMin[1], bbMin[2]), color);
 		drawLine(trans * btVector3(bbMax[0], bbMin[1], bbMin[2]), trans * btVector3(bbMax[0], bbMax[1], bbMin[2]), color);
@@ -280,8 +311,109 @@ class	btIDebugDraw
 		drawLine(trans * btVector3(bbMax[0], bbMax[1], bbMax[2]), trans * btVector3(bbMin[0], bbMax[1], bbMax[2]), color);
 		drawLine(trans * btVector3(bbMin[0], bbMax[1], bbMax[2]), trans * btVector3(bbMin[0], bbMin[1], bbMax[2]), color);
 	}
+
+	virtual void drawCapsule(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
+	{
+		btVector3 capStart(0.f,0.f,0.f);
+		capStart[upAxis] = -halfHeight;
+
+		btVector3 capEnd(0.f,0.f,0.f);
+		capEnd[upAxis] = halfHeight;
+
+		// Draw the ends
+		{
+
+			btTransform childTransform = transform;
+			childTransform.getOrigin() = transform * capStart;
+			drawSphere(radius, childTransform, color);
+		}
+
+		{
+			btTransform childTransform = transform;
+			childTransform.getOrigin() = transform * capEnd;
+			drawSphere(radius, childTransform, color);
+		}
+
+		// Draw some additional lines
+		btVector3 start = transform.getOrigin();
+
+		capStart[(upAxis+1)%3] = radius;
+		capEnd[(upAxis+1)%3] = radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+		capStart[(upAxis+1)%3] = -radius;
+		capEnd[(upAxis+1)%3] = -radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+
+		capStart[(upAxis+1)%3] = 0.f;
+		capEnd[(upAxis+1)%3] = 0.f;
+
+		capStart[(upAxis+2)%3] = radius;
+		capEnd[(upAxis+2)%3] = radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+		capStart[(upAxis+2)%3] = -radius;
+		capEnd[(upAxis+2)%3] = -radius;
+		drawLine(start+transform.getBasis() * capStart,start+transform.getBasis() * capEnd, color);
+	}
+
+	virtual void drawCylinder(btScalar radius, btScalar halfHeight, int upAxis, const btTransform& transform, const btVector3& color)
+	{
+		btVector3 start = transform.getOrigin();
+		btVector3	offsetHeight(0,0,0);
+		offsetHeight[upAxis] = halfHeight;
+		btVector3	offsetRadius(0,0,0);
+		offsetRadius[(upAxis+1)%3] = radius;
+		drawLine(start+transform.getBasis() * (offsetHeight+offsetRadius),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight-offsetRadius),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
+
+		// Drawing top and bottom caps of the cylinder
+		btVector3 yaxis(0,0,0);
+		yaxis[upAxis] = btScalar(1.0);
+		btVector3 xaxis(0,0,0);
+		xaxis[(upAxis+1)%3] = btScalar(1.0);
+		drawArc(start-transform.getBasis()*(offsetHeight),transform.getBasis()*yaxis,transform.getBasis()*xaxis,radius,radius,0,SIMD_2_PI,color,false,btScalar(10.0));
+		drawArc(start+transform.getBasis()*(offsetHeight),transform.getBasis()*yaxis,transform.getBasis()*xaxis,radius,radius,0,SIMD_2_PI,color,false,btScalar(10.0));
+	}
+
+	virtual void drawCone(btScalar radius, btScalar height, int upAxis, const btTransform& transform, const btVector3& color)
+	{
+
+		btVector3 start = transform.getOrigin();
+
+		btVector3	offsetHeight(0,0,0);
+		offsetHeight[upAxis] = height * btScalar(0.5);
+		btVector3	offsetRadius(0,0,0);
+		offsetRadius[(upAxis+1)%3] = radius;
+		btVector3	offset2Radius(0,0,0);
+		offset2Radius[(upAxis+2)%3] = radius;
+
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offsetRadius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight-offsetRadius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight+offset2Radius),color);
+		drawLine(start+transform.getBasis() * (offsetHeight),start+transform.getBasis() * (-offsetHeight-offset2Radius),color);
+
+		// Drawing the base of the cone
+		btVector3 yaxis(0,0,0);
+		yaxis[upAxis] = btScalar(1.0);
+		btVector3 xaxis(0,0,0);
+		xaxis[(upAxis+1)%3] = btScalar(1.0);
+		drawArc(start-transform.getBasis()*(offsetHeight),transform.getBasis()*yaxis,transform.getBasis()*xaxis,radius,radius,0,SIMD_2_PI,color,false,10.0);
+	}
+
+	virtual void drawPlane(const btVector3& planeNormal, btScalar planeConst, const btTransform& transform, const btVector3& color)
+	{
+		btVector3 planeOrigin = planeNormal * planeConst;
+		btVector3 vec0,vec1;
+		btPlaneSpace1(planeNormal,vec0,vec1);
+		btScalar vecLen = 100.f;
+		btVector3 pt0 = planeOrigin + vec0*vecLen;
+		btVector3 pt1 = planeOrigin - vec0*vecLen;
+		btVector3 pt2 = planeOrigin + vec1*vecLen;
+		btVector3 pt3 = planeOrigin - vec1*vecLen;
+		drawLine(transform*pt0,transform*pt1,color);
+		drawLine(transform*pt2,transform*pt3,color);
+	}
 };
 
 
-#endif //IDEBUG_DRAW__H
+#endif //BT_IDEBUG_DRAW__H
 
diff --git a/Engine/lib/bullet/src/LinearMath/btList.h b/Engine/lib/bullet/src/LinearMath/btList.h
index c87b47faf..eec80a706 100644
--- a/Engine/lib/bullet/src/LinearMath/btList.h
+++ b/Engine/lib/bullet/src/LinearMath/btList.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 
 
 
-#ifndef GEN_LIST_H
-#define GEN_LIST_H
+#ifndef BT_GEN_LIST_H
+#define BT_GEN_LIST_H
 
 class btGEN_Link {
 public:
@@ -67,7 +67,7 @@ private:
     btGEN_Link m_tail;
 };
 
-#endif
+#endif //BT_GEN_LIST_H
 
 
 
diff --git a/Engine/lib/bullet/src/LinearMath/btMatrix3x3.h b/Engine/lib/bullet/src/LinearMath/btMatrix3x3.h
index ea26bbbba..d4f5c95aa 100644
--- a/Engine/lib/bullet/src/LinearMath/btMatrix3x3.h
+++ b/Engine/lib/bullet/src/LinearMath/btMatrix3x3.h
@@ -13,162 +13,285 @@ subject to the following restrictions:
 */
 
 
-#ifndef btMatrix3x3_H
-#define btMatrix3x3_H
-
-#include "btScalar.h"
+#ifndef	BT_MATRIX3x3_H
+#define BT_MATRIX3x3_H
 
 #include "btVector3.h"
 #include "btQuaternion.h"
+#include <stdio.h>
 
+#ifdef BT_USE_SSE
+//const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
+const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
+#endif
+
+#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
+#endif
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btMatrix3x3Data	btMatrix3x3DoubleData 
+#else
+#define btMatrix3x3Data	btMatrix3x3FloatData
+#endif //BT_USE_DOUBLE_PRECISION
 
 
 /**@brief The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with btQuaternion, btTransform and btVector3.
- * Make sure to only include a pure orthogonal matrix without scaling. */
-class btMatrix3x3 {
-	public:
-  /** @brief No initializaion constructor */
-		btMatrix3x3 () {}
-		
-//		explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
-		
-  /**@brief Constructor from Quaternion */
-		explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
-		/*
-		template <typename btScalar>
-		Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
-		{ 
-			setEulerYPR(yaw, pitch, roll);
-		}
-		*/
-  /** @brief Constructor with row major formatting */
-		btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
-				  const btScalar& yx, const btScalar& yy, const btScalar& yz,
-				  const btScalar& zx, const btScalar& zy, const btScalar& zz)
-		{ 
-			setValue(xx, xy, xz, 
-					 yx, yy, yz, 
-					 zx, zy, zz);
-		}
-  /** @brief Copy constructor */
-		SIMD_FORCE_INLINE btMatrix3x3 (const btMatrix3x3& other)
-		{
-			m_el[0] = other.m_el[0];
-			m_el[1] = other.m_el[1];
-			m_el[2] = other.m_el[2];
-		}
-  /** @brief Assignment Operator */
-		SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& other)
-		{
-			m_el[0] = other.m_el[0];
-			m_el[1] = other.m_el[1];
-			m_el[2] = other.m_el[2];
-			return *this;
-		}
+* Make sure to only include a pure orthogonal matrix without scaling. */
+ATTRIBUTE_ALIGNED16(class) btMatrix3x3 {
 
-  /** @brief Get a column of the matrix as a vector 
-   *  @param i Column number 0 indexed */
-		SIMD_FORCE_INLINE btVector3 getColumn(int i) const
-		{
-			return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
-		}
-		
+	///Data storage for the matrix, each vector is a row of the matrix
+	btVector3 m_el[3];
 
-  /** @brief Get a row of the matrix as a vector 
-   *  @param i Row number 0 indexed */
-		SIMD_FORCE_INLINE const btVector3& getRow(int i) const
-		{
-			btFullAssert(0 <= i && i < 3);
-			return m_el[i];
-		}
+public:
+	/** @brief No initializaion constructor */
+	btMatrix3x3 () {}
 
-  /** @brief Get a mutable reference to a row of the matrix as a vector 
-   *  @param i Row number 0 indexed */
-		SIMD_FORCE_INLINE btVector3&  operator[](int i)
-		{ 
-			btFullAssert(0 <= i && i < 3);
-			return m_el[i]; 
-		}
+	//		explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
+
+	/**@brief Constructor from Quaternion */
+	explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
+	/*
+	template <typename btScalar>
+	Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
+	{ 
+	setEulerYPR(yaw, pitch, roll);
+	}
+	*/
+	/** @brief Constructor with row major formatting */
+	btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
+		const btScalar& yx, const btScalar& yy, const btScalar& yz,
+		const btScalar& zx, const btScalar& zy, const btScalar& zz)
+	{ 
+		setValue(xx, xy, xz, 
+			yx, yy, yz, 
+			zx, zy, zz);
+	}
+
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+	SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 ) 
+	{
+        m_el[0].mVec128 = v0;
+        m_el[1].mVec128 = v1;
+        m_el[2].mVec128 = v2;
+	}
+
+	SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 ) 
+	{
+        m_el[0] = v0;
+        m_el[1] = v1;
+        m_el[2] = v2;
+	}
+
+	// Copy constructor
+	SIMD_FORCE_INLINE btMatrix3x3(const btMatrix3x3& rhs)
+	{
+		m_el[0].mVec128 = rhs.m_el[0].mVec128;
+		m_el[1].mVec128 = rhs.m_el[1].mVec128;
+		m_el[2].mVec128 = rhs.m_el[2].mVec128;
+	}
+
+	// Assignment Operator
+	SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m) 
+	{
+		m_el[0].mVec128 = m.m_el[0].mVec128;
+		m_el[1].mVec128 = m.m_el[1].mVec128;
+		m_el[2].mVec128 = m.m_el[2].mVec128;
 		
-  /** @brief Get a const reference to a row of the matrix as a vector 
-   *  @param i Row number 0 indexed */
-		SIMD_FORCE_INLINE const btVector3& operator[](int i) const
-		{
-			btFullAssert(0 <= i && i < 3);
-			return m_el[i]; 
-		}
-		
-  /** @brief Multiply by the target matrix on the right
-   *  @param m Rotation matrix to be applied 
-   * Equivilant to this = this * m */
-		btMatrix3x3& operator*=(const btMatrix3x3& m); 
-		
-  /** @brief Set from a carray of btScalars 
-   *  @param m A pointer to the beginning of an array of 9 btScalars */
+		return *this;
+	}
+
+#else
+
+	/** @brief Copy constructor */
+	SIMD_FORCE_INLINE btMatrix3x3 (const btMatrix3x3& other)
+	{
+		m_el[0] = other.m_el[0];
+		m_el[1] = other.m_el[1];
+		m_el[2] = other.m_el[2];
+	}
+    
+	/** @brief Assignment Operator */
+	SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& other)
+	{
+		m_el[0] = other.m_el[0];
+		m_el[1] = other.m_el[1];
+		m_el[2] = other.m_el[2];
+		return *this;
+	}
+
+#endif
+
+	/** @brief Get a column of the matrix as a vector 
+	*  @param i Column number 0 indexed */
+	SIMD_FORCE_INLINE btVector3 getColumn(int i) const
+	{
+		return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
+	}
+
+
+	/** @brief Get a row of the matrix as a vector 
+	*  @param i Row number 0 indexed */
+	SIMD_FORCE_INLINE const btVector3& getRow(int i) const
+	{
+		btFullAssert(0 <= i && i < 3);
+		return m_el[i];
+	}
+
+	/** @brief Get a mutable reference to a row of the matrix as a vector 
+	*  @param i Row number 0 indexed */
+	SIMD_FORCE_INLINE btVector3&  operator[](int i)
+	{ 
+		btFullAssert(0 <= i && i < 3);
+		return m_el[i]; 
+	}
+
+	/** @brief Get a const reference to a row of the matrix as a vector 
+	*  @param i Row number 0 indexed */
+	SIMD_FORCE_INLINE const btVector3& operator[](int i) const
+	{
+		btFullAssert(0 <= i && i < 3);
+		return m_el[i]; 
+	}
+
+	/** @brief Multiply by the target matrix on the right
+	*  @param m Rotation matrix to be applied 
+	* Equivilant to this = this * m */
+	btMatrix3x3& operator*=(const btMatrix3x3& m); 
+
+	/** @brief Adds by the target matrix on the right
+	*  @param m matrix to be applied 
+	* Equivilant to this = this + m */
+	btMatrix3x3& operator+=(const btMatrix3x3& m); 
+
+	/** @brief Substractss by the target matrix on the right
+	*  @param m matrix to be applied 
+	* Equivilant to this = this - m */
+	btMatrix3x3& operator-=(const btMatrix3x3& m); 
+
+	/** @brief Set from the rotational part of a 4x4 OpenGL matrix
+	*  @param m A pointer to the beginning of the array of scalars*/
 	void setFromOpenGLSubMatrix(const btScalar *m)
-		{
-			m_el[0].setValue(m[0],m[4],m[8]);
-			m_el[1].setValue(m[1],m[5],m[9]);
-			m_el[2].setValue(m[2],m[6],m[10]);
+	{
+		m_el[0].setValue(m[0],m[4],m[8]);
+		m_el[1].setValue(m[1],m[5],m[9]);
+		m_el[2].setValue(m[2],m[6],m[10]);
 
-		}
-  /** @brief Set the values of the matrix explicitly (row major)
-   *  @param xx Top left
-   *  @param xy Top Middle
-   *  @param xz Top Right
-   *  @param yx Middle Left
-   *  @param yy Middle Middle
-   *  @param yz Middle Right
-   *  @param zx Bottom Left
-   *  @param zy Bottom Middle
-   *  @param zz Bottom Right*/
-		void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz, 
-					  const btScalar& yx, const btScalar& yy, const btScalar& yz, 
-					  const btScalar& zx, const btScalar& zy, const btScalar& zz)
-		{
-			m_el[0].setValue(xx,xy,xz);
-			m_el[1].setValue(yx,yy,yz);
-			m_el[2].setValue(zx,zy,zz);
-		}
+	}
+	/** @brief Set the values of the matrix explicitly (row major)
+	*  @param xx Top left
+	*  @param xy Top Middle
+	*  @param xz Top Right
+	*  @param yx Middle Left
+	*  @param yy Middle Middle
+	*  @param yz Middle Right
+	*  @param zx Bottom Left
+	*  @param zy Bottom Middle
+	*  @param zz Bottom Right*/
+	void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz, 
+		const btScalar& yx, const btScalar& yy, const btScalar& yz, 
+		const btScalar& zx, const btScalar& zy, const btScalar& zz)
+	{
+		m_el[0].setValue(xx,xy,xz);
+		m_el[1].setValue(yx,yy,yz);
+		m_el[2].setValue(zx,zy,zz);
+	}
 
-  /** @brief Set the matrix from a quaternion
-   *  @param q The Quaternion to match */  
-		void setRotation(const btQuaternion& q) 
-		{
-			btScalar d = q.length2();
-			btFullAssert(d != btScalar(0.0));
-			btScalar s = btScalar(2.0) / d;
-			btScalar xs = q.x() * s,   ys = q.y() * s,   zs = q.z() * s;
-			btScalar wx = q.w() * xs,  wy = q.w() * ys,  wz = q.w() * zs;
-			btScalar xx = q.x() * xs,  xy = q.x() * ys,  xz = q.x() * zs;
-			btScalar yy = q.y() * ys,  yz = q.y() * zs,  zz = q.z() * zs;
-			setValue(btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
-					 xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
-					 xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
-		}
-		
+	/** @brief Set the matrix from a quaternion
+	*  @param q The Quaternion to match */  
+	void setRotation(const btQuaternion& q) 
+	{
+		btScalar d = q.length2();
+		btFullAssert(d != btScalar(0.0));
+		btScalar s = btScalar(2.0) / d;
+    
+    #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+        __m128	vs, Q = q.get128();
+		__m128i Qi = btCastfTo128i(Q);
+        __m128	Y, Z;
+        __m128	V1, V2, V3;
+        __m128	V11, V21, V31;
+        __m128	NQ = _mm_xor_ps(Q, btvMzeroMask);
+		__m128i NQi = btCastfTo128i(NQ);
+        
+        V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));	// Y X Z W
+		V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));     // -X -X  Y  W
+        V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));	// Z Y X W
+        V1 = _mm_xor_ps(V1, vMPPP);	//	change the sign of the first element
+			
+        V11	= btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));	// Y Y X W
+		V21 = _mm_unpackhi_ps(Q, Q);                    //  Z  Z  W  W
+		V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));	//  X  Z -X -W
 
-  /** @brief Set the matrix from euler angles using YPR around YXZ respectively
-   *  @param yaw Yaw about Y axis
-   *  @param pitch Pitch about X axis
-   *  @param roll Roll about Z axis 
-   */
-		void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll) 
-		{
-			setEulerZYX(roll, pitch, yaw);
-		}
+		V2 = V2 * V1;	//
+		V1 = V1 * V11;	//
+		V3 = V3 * V31;	//
+
+        V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));	//	-Z -W  Y  W
+		V11 = V11 * V21;	//
+        V21 = _mm_xor_ps(V21, vMPPP);	//	change the sign of the first element
+		V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));	//	 W  W -Y -W
+        V31 = _mm_xor_ps(V31, vMPPP);	//	change the sign of the first element
+		Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));	// -W -Z -X -W
+		Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3)));	//  Y  X  Y  W
+
+		vs = _mm_load_ss(&s);
+		V21 = V21 * Y;
+		V31 = V31 * Z;
+
+		V1 = V1 + V11;
+        V2 = V2 + V21;
+        V3 = V3 + V31;
+
+        vs = bt_splat3_ps(vs, 0);
+            //	s ready
+        V1 = V1 * vs;
+        V2 = V2 * vs;
+        V3 = V3 * vs;
+        
+        V1 = V1 + v1000;
+        V2 = V2 + v0100;
+        V3 = V3 + v0010;
+        
+        m_el[0] = V1; 
+        m_el[1] = V2;
+        m_el[2] = V3;
+    #else    
+		btScalar xs = q.x() * s,   ys = q.y() * s,   zs = q.z() * s;
+		btScalar wx = q.w() * xs,  wy = q.w() * ys,  wz = q.w() * zs;
+		btScalar xx = q.x() * xs,  xy = q.x() * ys,  xz = q.x() * zs;
+		btScalar yy = q.y() * ys,  yz = q.y() * zs,  zz = q.z() * zs;
+		setValue(
+            btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
+			xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
+			xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
+	#endif
+    }
+
+
+	/** @brief Set the matrix from euler angles using YPR around YXZ respectively
+	*  @param yaw Yaw about Y axis
+	*  @param pitch Pitch about X axis
+	*  @param roll Roll about Z axis 
+	*/
+	void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll) 
+	{
+		setEulerZYX(roll, pitch, yaw);
+	}
 
 	/** @brief Set the matrix from euler angles YPR around ZYX axes
-	 * @param eulerX Roll about X axis
-         * @param eulerY Pitch around Y axis
-         * @param eulerZ Yaw aboud Z axis
-         * 
-	 * These angles are used to produce a rotation matrix. The euler
-	 * angles are applied in ZYX order. I.e a vector is first rotated 
-	 * about X then Y and then Z
-	 **/
+	* @param eulerX Roll about X axis
+	* @param eulerY Pitch around Y axis
+	* @param eulerZ Yaw aboud Z axis
+	* 
+	* These angles are used to produce a rotation matrix. The euler
+	* angles are applied in ZYX order. I.e a vector is first rotated 
+	* about X then Y and then Z
+	**/
 	void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) { 
-  ///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
+		///@todo proposed to reverse this since it's labeled zyx but takes arguments xyz and it will match all other parts of the code
 		btScalar ci ( btCos(eulerX)); 
 		btScalar cj ( btCos(eulerY)); 
 		btScalar ch ( btCos(eulerZ)); 
@@ -179,227 +302,340 @@ class btMatrix3x3 {
 		btScalar cs = ci * sh; 
 		btScalar sc = si * ch; 
 		btScalar ss = si * sh;
-		
+
 		setValue(cj * ch, sj * sc - cs, sj * cc + ss,
-				 cj * sh, sj * ss + cc, sj * cs - sc, 
-	       			 -sj,      cj * si,      cj * ci);
+			cj * sh, sj * ss + cc, sj * cs - sc, 
+			-sj,      cj * si,      cj * ci);
 	}
 
-  /**@brief Set the matrix to the identity */
-		void setIdentity()
-		{ 
-			setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0), 
-					 btScalar(0.0), btScalar(1.0), btScalar(0.0), 
-					 btScalar(0.0), btScalar(0.0), btScalar(1.0)); 
-		}
+	/**@brief Set the matrix to the identity */
+	void setIdentity()
+	{ 
+#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
+			m_el[0] = v1000; 
+			m_el[1] = v0100;
+			m_el[2] = v0010;
+#else
+		setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(1.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(0.0), btScalar(1.0)); 
+#endif
+	}
 
-		static const btMatrix3x3&	getIdentity()
+	static const btMatrix3x3&	getIdentity()
+	{
+#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
+        static const btMatrix3x3 
+        identityMatrix(v1000, v0100, v0010);
+#else
+		static const btMatrix3x3 
+        identityMatrix(
+            btScalar(1.0), btScalar(0.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(1.0), btScalar(0.0), 
+			btScalar(0.0), btScalar(0.0), btScalar(1.0));
+#endif
+		return identityMatrix;
+	}
+
+	/**@brief Fill the rotational part of an OpenGL matrix and clear the shear/perspective
+	* @param m The array to be filled */
+	void getOpenGLSubMatrix(btScalar *m) const 
+	{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+        __m128 v0 = m_el[0].mVec128;
+        __m128 v1 = m_el[1].mVec128;
+        __m128 v2 = m_el[2].mVec128;    //  x2 y2 z2 w2
+        __m128 *vm = (__m128 *)m;
+        __m128 vT;
+        
+        v2 = _mm_and_ps(v2, btvFFF0fMask);  //  x2 y2 z2 0
+        
+        vT = _mm_unpackhi_ps(v0, v1);	//	z0 z1 * *
+        v0 = _mm_unpacklo_ps(v0, v1);	//	x0 x1 y0 y1
+
+        v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );	// y0 y1 y2 0
+        v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );	// x0 x1 x2 0
+        v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));	// z0 z1 z2 0
+
+        vm[0] = v0;
+        vm[1] = v1;
+        vm[2] = v2;
+#elif defined(BT_USE_NEON)
+        // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
+        static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
+        float32x4_t *vm = (float32x4_t *)m;
+        float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  // {x0 x1 z0 z1}, {y0 y1 w0 w1}
+        float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       // {x2  0 }, {y2 0}
+        float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
+        float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
+        float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
+        float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       // z0 z1 z2  0
+
+        vm[0] = v0;
+        vm[1] = v1;
+        vm[2] = v2;
+#else
+		m[0]  = btScalar(m_el[0].x()); 
+		m[1]  = btScalar(m_el[1].x());
+		m[2]  = btScalar(m_el[2].x());
+		m[3]  = btScalar(0.0); 
+		m[4]  = btScalar(m_el[0].y());
+		m[5]  = btScalar(m_el[1].y());
+		m[6]  = btScalar(m_el[2].y());
+		m[7]  = btScalar(0.0); 
+		m[8]  = btScalar(m_el[0].z()); 
+		m[9]  = btScalar(m_el[1].z());
+		m[10] = btScalar(m_el[2].z());
+		m[11] = btScalar(0.0); 
+#endif
+	}
+
+	/**@brief Get the matrix represented as a quaternion 
+	* @param q The quaternion which will be set */
+	void getRotation(btQuaternion& q) const
+	{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+        btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
+        btScalar s, x;
+        
+        union {
+            btSimdFloat4 vec;
+            btScalar f[4];
+        } temp;
+        
+        if (trace > btScalar(0.0)) 
+        {
+            x = trace + btScalar(1.0);
+
+            temp.f[0]=m_el[2].y() - m_el[1].z();
+            temp.f[1]=m_el[0].z() - m_el[2].x();
+            temp.f[2]=m_el[1].x() - m_el[0].y();
+            temp.f[3]=x;
+            //temp.f[3]= s * btScalar(0.5);
+        } 
+        else 
+        {
+            int i, j, k;
+            if(m_el[0].x() < m_el[1].y()) 
+            { 
+                if( m_el[1].y() < m_el[2].z() )
+                    { i = 2; j = 0; k = 1; }
+                else
+                    { i = 1; j = 2; k = 0; }
+            }
+            else
+            {
+                if( m_el[0].x() < m_el[2].z())
+                    { i = 2; j = 0; k = 1; }
+                else
+                    { i = 0; j = 1; k = 2; }
+            }
+
+            x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
+
+            temp.f[3] = (m_el[k][j] - m_el[j][k]);
+            temp.f[j] = (m_el[j][i] + m_el[i][j]);
+            temp.f[k] = (m_el[k][i] + m_el[i][k]);
+            temp.f[i] = x;
+            //temp.f[i] = s * btScalar(0.5);
+        }
+
+        s = btSqrt(x);
+        q.set128(temp.vec);
+        s = btScalar(0.5) / s;
+
+        q *= s;
+#else    
+		btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
+
+		btScalar temp[4];
+
+		if (trace > btScalar(0.0)) 
 		{
-			static const btMatrix3x3 identityMatrix(btScalar(1.0), btScalar(0.0), btScalar(0.0), 
-					 btScalar(0.0), btScalar(1.0), btScalar(0.0), 
-					 btScalar(0.0), btScalar(0.0), btScalar(1.0));
-			return identityMatrix;
-		}
+			btScalar s = btSqrt(trace + btScalar(1.0));
+			temp[3]=(s * btScalar(0.5));
+			s = btScalar(0.5) / s;
 
-  /**@brief Fill the values of the matrix into a 9 element array 
-   * @param m The array to be filled */
-		void getOpenGLSubMatrix(btScalar *m) const 
+			temp[0]=((m_el[2].y() - m_el[1].z()) * s);
+			temp[1]=((m_el[0].z() - m_el[2].x()) * s);
+			temp[2]=((m_el[1].x() - m_el[0].y()) * s);
+		} 
+		else 
 		{
-			m[0]  = btScalar(m_el[0].x()); 
-			m[1]  = btScalar(m_el[1].x());
-			m[2]  = btScalar(m_el[2].x());
-			m[3]  = btScalar(0.0); 
-			m[4]  = btScalar(m_el[0].y());
-			m[5]  = btScalar(m_el[1].y());
-			m[6]  = btScalar(m_el[2].y());
-			m[7]  = btScalar(0.0); 
-			m[8]  = btScalar(m_el[0].z()); 
-			m[9]  = btScalar(m_el[1].z());
-			m[10] = btScalar(m_el[2].z());
-			m[11] = btScalar(0.0); 
+			int i = m_el[0].x() < m_el[1].y() ? 
+				(m_el[1].y() < m_el[2].z() ? 2 : 1) :
+				(m_el[0].x() < m_el[2].z() ? 2 : 0); 
+			int j = (i + 1) % 3;  
+			int k = (i + 2) % 3;
+
+			btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
+			temp[i] = s * btScalar(0.5);
+			s = btScalar(0.5) / s;
+
+			temp[3] = (m_el[k][j] - m_el[j][k]) * s;
+			temp[j] = (m_el[j][i] + m_el[i][j]) * s;
+			temp[k] = (m_el[k][i] + m_el[i][k]) * s;
 		}
+		q.setValue(temp[0],temp[1],temp[2],temp[3]);
+#endif
+	}
 
-  /**@brief Get the matrix represented as a quaternion 
-   * @param q The quaternion which will be set */
-		void getRotation(btQuaternion& q) const
+	/**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
+	* @param yaw Yaw around Y axis
+	* @param pitch Pitch around X axis
+	* @param roll around Z axis */	
+	void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
+	{
+
+		// first use the normal calculus
+		yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
+		pitch = btScalar(btAsin(-m_el[2].x()));
+		roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
+
+		// on pitch = +/-HalfPI
+		if (btFabs(pitch)==SIMD_HALF_PI)
 		{
-			btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
-			btScalar temp[4];
-			
-			if (trace > btScalar(0.0)) 
-			{
-				btScalar s = btSqrt(trace + btScalar(1.0));
-				temp[3]=(s * btScalar(0.5));
-				s = btScalar(0.5) / s;
-				
-				temp[0]=((m_el[2].y() - m_el[1].z()) * s);
-				temp[1]=((m_el[0].z() - m_el[2].x()) * s);
-				temp[2]=((m_el[1].x() - m_el[0].y()) * s);
-			} 
-			else 
-			{
-				int i = m_el[0].x() < m_el[1].y() ? 
-					(m_el[1].y() < m_el[2].z() ? 2 : 1) :
-					(m_el[0].x() < m_el[2].z() ? 2 : 0); 
-				int j = (i + 1) % 3;  
-				int k = (i + 2) % 3;
-				
-				btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
-				temp[i] = s * btScalar(0.5);
-				s = btScalar(0.5) / s;
-				
-				temp[3] = (m_el[k][j] - m_el[j][k]) * s;
-				temp[j] = (m_el[j][i] + m_el[i][j]) * s;
-				temp[k] = (m_el[k][i] + m_el[i][k]) * s;
-			}
-			q.setValue(temp[0],temp[1],temp[2],temp[3]);
+			if (yaw>0)
+				yaw-=SIMD_PI;
+			else
+				yaw+=SIMD_PI;
+
+			if (roll>0)
+				roll-=SIMD_PI;
+			else
+				roll+=SIMD_PI;
 		}
+	};
 
-  /**@brief Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR
-   * @param yaw Yaw around Y axis
-   * @param pitch Pitch around X axis
-   * @param roll around Z axis */	
-		void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
+
+	/**@brief Get the matrix represented as euler angles around ZYX
+	* @param yaw Yaw around X axis
+	* @param pitch Pitch around Y axis
+	* @param roll around X axis 
+	* @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/	
+	void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
+	{
+		struct Euler
 		{
-			
-			// first use the normal calculus
-			yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
-			pitch = btScalar(btAsin(-m_el[2].x()));
-			roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
-
-			// on pitch = +/-HalfPI
-			if (btFabs(pitch)==SIMD_HALF_PI)
-			{
-				if (yaw>0)
-					yaw-=SIMD_PI;
-				else
-					yaw+=SIMD_PI;
-
-				if (roll>0)
-					roll-=SIMD_PI;
-				else
-					roll+=SIMD_PI;
-			}
+			btScalar yaw;
+			btScalar pitch;
+			btScalar roll;
 		};
 
+		Euler euler_out;
+		Euler euler_out2; //second solution
+		//get the pointer to the raw data
 
-  /**@brief Get the matrix represented as euler angles around ZYX
-   * @param yaw Yaw around X axis
-   * @param pitch Pitch around Y axis
-   * @param roll around X axis 
-   * @param solution_number Which solution of two possible solutions ( 1 or 2) are possible values*/	
-  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
-  {
-    struct Euler{btScalar yaw, pitch, roll;};
-    Euler euler_out;
-    Euler euler_out2; //second solution
-    //get the pointer to the raw data
-    
-    // Check that pitch is not at a singularity
-    if (btFabs(m_el[2].x()) >= 1)
-    {
-      euler_out.yaw = 0;
-      euler_out2.yaw = 0;
-	
-      // From difference of angles formula
-      btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
-      if (m_el[2].x() > 0)  //gimbal locked up
-      {
-        euler_out.pitch = SIMD_PI / btScalar(2.0);
-        euler_out2.pitch = SIMD_PI / btScalar(2.0);
-        euler_out.roll = euler_out.pitch + delta;
-        euler_out2.roll = euler_out.pitch + delta;
-      }
-      else // gimbal locked down
-      {
-        euler_out.pitch = -SIMD_PI / btScalar(2.0);
-        euler_out2.pitch = -SIMD_PI / btScalar(2.0);
-        euler_out.roll = -euler_out.pitch + delta;
-        euler_out2.roll = -euler_out.pitch + delta;
-      }
-    }
-    else
-    {
-      euler_out.pitch = - btAsin(m_el[2].x());
-      euler_out2.pitch = SIMD_PI - euler_out.pitch;
-	
-      euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch), 
-			       m_el[2].z()/btCos(euler_out.pitch));
-      euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch), 
+		// Check that pitch is not at a singularity
+		if (btFabs(m_el[2].x()) >= 1)
+		{
+			euler_out.yaw = 0;
+			euler_out2.yaw = 0;
+
+			// From difference of angles formula
+			btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
+			if (m_el[2].x() > 0)  //gimbal locked up
+			{
+				euler_out.pitch = SIMD_PI / btScalar(2.0);
+				euler_out2.pitch = SIMD_PI / btScalar(2.0);
+				euler_out.roll = euler_out.pitch + delta;
+				euler_out2.roll = euler_out.pitch + delta;
+			}
+			else // gimbal locked down
+			{
+				euler_out.pitch = -SIMD_PI / btScalar(2.0);
+				euler_out2.pitch = -SIMD_PI / btScalar(2.0);
+				euler_out.roll = -euler_out.pitch + delta;
+				euler_out2.roll = -euler_out.pitch + delta;
+			}
+		}
+		else
+		{
+			euler_out.pitch = - btAsin(m_el[2].x());
+			euler_out2.pitch = SIMD_PI - euler_out.pitch;
+
+			euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch), 
+				m_el[2].z()/btCos(euler_out.pitch));
+			euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch), 
 				m_el[2].z()/btCos(euler_out2.pitch));
-	
-      euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch), 
-			      m_el[0].x()/btCos(euler_out.pitch));
-      euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch), 
-                               m_el[0].x()/btCos(euler_out2.pitch));
-    }
-    
-    if (solution_number == 1)
-    { 
-		yaw = euler_out.yaw; 
-		pitch = euler_out.pitch;
-		roll = euler_out.roll;
-    }
-    else
-    { 
-		yaw = euler_out2.yaw; 
-		pitch = euler_out2.pitch;
-		roll = euler_out2.roll;
-    }
-  }
 
-  /**@brief Create a scaled copy of the matrix 
-   * @param s Scaling vector The elements of the vector will scale each column */
-		
-		btMatrix3x3 scaled(const btVector3& s) const
-		{
-			return btMatrix3x3(m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
-									 m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
-									 m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
+			euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch), 
+				m_el[0].x()/btCos(euler_out.pitch));
+			euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch), 
+				m_el[0].x()/btCos(euler_out2.pitch));
 		}
 
-  /**@brief Return the determinant of the matrix */
-		btScalar            determinant() const;
-  /**@brief Return the adjoint of the matrix */
-		btMatrix3x3 adjoint() const;
-  /**@brief Return the matrix with all values non negative */
-		btMatrix3x3 absolute() const;
-  /**@brief Return the transpose of the matrix */
-		btMatrix3x3 transpose() const;
-  /**@brief Return the inverse of the matrix */
-		btMatrix3x3 inverse() const; 
-		
-		btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
-		btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
-		
-		SIMD_FORCE_INLINE btScalar tdotx(const btVector3& v) const 
-		{
-			return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
+		if (solution_number == 1)
+		{ 
+			yaw = euler_out.yaw; 
+			pitch = euler_out.pitch;
+			roll = euler_out.roll;
 		}
-		SIMD_FORCE_INLINE btScalar tdoty(const btVector3& v) const 
-		{
-			return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
+		else
+		{ 
+			yaw = euler_out2.yaw; 
+			pitch = euler_out2.pitch;
+			roll = euler_out2.roll;
 		}
-		SIMD_FORCE_INLINE btScalar tdotz(const btVector3& v) const 
-		{
-			return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
-		}
-		
+	}
 
-  /**@brief diagonalizes this matrix by the Jacobi method.
-   * @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
-   * coordinate system, i.e., old_this = rot * new_this * rot^T. 
-   * @param threshold See iteration
-   * @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied 
-   * by the sum of the absolute values of the diagonal, or when maxSteps have been executed. 
-   * 
-   * Note that this matrix is assumed to be symmetric. 
-   */
-		void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
+	/**@brief Create a scaled copy of the matrix 
+	* @param s Scaling vector The elements of the vector will scale each column */
+
+	btMatrix3x3 scaled(const btVector3& s) const
+	{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+		return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
+#else		
+		return btMatrix3x3(
+            m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
+			m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
+			m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
+#endif
+	}
+
+	/**@brief Return the determinant of the matrix */
+	btScalar            determinant() const;
+	/**@brief Return the adjoint of the matrix */
+	btMatrix3x3 adjoint() const;
+	/**@brief Return the matrix with all values non negative */
+	btMatrix3x3 absolute() const;
+	/**@brief Return the transpose of the matrix */
+	btMatrix3x3 transpose() const;
+	/**@brief Return the inverse of the matrix */
+	btMatrix3x3 inverse() const; 
+
+	btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
+	btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
+
+	SIMD_FORCE_INLINE btScalar tdotx(const btVector3& v) const 
+	{
+		return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
+	}
+	SIMD_FORCE_INLINE btScalar tdoty(const btVector3& v) const 
+	{
+		return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
+	}
+	SIMD_FORCE_INLINE btScalar tdotz(const btVector3& v) const 
+	{
+		return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
+	}
+
+
+	/**@brief diagonalizes this matrix by the Jacobi method.
+	* @param rot stores the rotation from the coordinate system in which the matrix is diagonal to the original
+	* coordinate system, i.e., old_this = rot * new_this * rot^T. 
+	* @param threshold See iteration
+	* @param iteration The iteration stops when all off-diagonal elements are less than the threshold multiplied 
+	* by the sum of the absolute values of the diagonal, or when maxSteps have been executed. 
+	* 
+	* Note that this matrix is assumed to be symmetric. 
+	*/
+	void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
+	{
+		rot.setIdentity();
+		for (int step = maxSteps; step > 0; step--)
 		{
-		 rot.setIdentity();
-		 for (int step = maxSteps; step > 0; step--)
-		 {
 			// find off-diagonal element [p][q] with largest magnitude
 			int p = 0;
 			int q = 1;
@@ -408,27 +644,27 @@ class btMatrix3x3 {
 			btScalar v = btFabs(m_el[0][2]);
 			if (v > max)
 			{
-			   q = 2;
-			   r = 1;
-			   max = v;
+				q = 2;
+				r = 1;
+				max = v;
 			}
 			v = btFabs(m_el[1][2]);
 			if (v > max)
 			{
-			   p = 1;
-			   q = 2;
-			   r = 0;
-			   max = v;
+				p = 1;
+				q = 2;
+				r = 0;
+				max = v;
 			}
 
 			btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
 			if (max <= t)
 			{
-			   if (max <= SIMD_EPSILON * t)
-			   {
-				  return;
-			   }
-			   step = 1;
+				if (max <= SIMD_EPSILON * t)
+				{
+					return;
+				}
+				step = 1;
 			}
 
 			// compute Jacobi rotation J which leads to a zero for element [p][q] 
@@ -439,17 +675,17 @@ class btMatrix3x3 {
 			btScalar sin;
 			if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
 			{
-			   t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
-										: 1 / (theta - btSqrt(1 + theta2));
-			   cos = 1 / btSqrt(1 + t * t);
-			   sin = cos * t;
+				t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
+					: 1 / (theta - btSqrt(1 + theta2));
+				cos = 1 / btSqrt(1 + t * t);
+				sin = cos * t;
 			}
 			else
 			{
-			   // approximation for large theta-value, i.e., a nearly diagonal matrix
-			   t = 1 / (theta * (2 + btScalar(0.5) / theta2));
-			   cos = 1 - btScalar(0.5) * t * t;
-			   sin = cos * t;
+				// approximation for large theta-value, i.e., a nearly diagonal matrix
+				t = 1 / (theta * (2 + btScalar(0.5) / theta2));
+				cos = 1 - btScalar(0.5) * t * t;
+				sin = cos * t;
 			}
 
 			// apply rotation to matrix (this = J^T * this * J)
@@ -464,155 +700,663 @@ class btMatrix3x3 {
 			// apply rotation to rot (rot = rot * J)
 			for (int i = 0; i < 3; i++)
 			{
-			   btVector3& row = rot[i];
-			   mrp = row[p];
-			   mrq = row[q];
-			   row[p] = cos * mrp - sin * mrq;
-			   row[q] = cos * mrq + sin * mrp;
+				btVector3& row = rot[i];
+				mrp = row[p];
+				mrq = row[q];
+				row[p] = cos * mrp - sin * mrq;
+				row[q] = cos * mrq + sin * mrp;
 			}
-		 }
 		}
-
-
-		
-	protected:
-  /**@brief Calculate the matrix cofactor 
-   * @param r1 The first row to use for calculating the cofactor
-   * @param c1 The first column to use for calculating the cofactor
-   * @param r1 The second row to use for calculating the cofactor
-   * @param c1 The second column to use for calculating the cofactor
-   * See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
-   */
-		btScalar cofac(int r1, int c1, int r2, int c2) const 
-		{
-			return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
-		}
-  ///Data storage for the matrix, each vector is a row of the matrix
-		btVector3 m_el[3];
-	};
-	
-	SIMD_FORCE_INLINE btMatrix3x3& 
-	btMatrix3x3::operator*=(const btMatrix3x3& m)
-	{
-		setValue(m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
-				 m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
-				 m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
-		return *this;
-	}
-	
-	SIMD_FORCE_INLINE btScalar 
-	btMatrix3x3::determinant() const
-	{ 
-		return btTriple((*this)[0], (*this)[1], (*this)[2]);
-	}
-	
-
-	SIMD_FORCE_INLINE btMatrix3x3 
-	btMatrix3x3::absolute() const
-	{
-		return btMatrix3x3(
-			btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
-			btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
-			btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
 	}
 
-	SIMD_FORCE_INLINE btMatrix3x3 
-	btMatrix3x3::transpose() const 
+
+
+
+	/**@brief Calculate the matrix cofactor 
+	* @param r1 The first row to use for calculating the cofactor
+	* @param c1 The first column to use for calculating the cofactor
+	* @param r1 The second row to use for calculating the cofactor
+	* @param c1 The second column to use for calculating the cofactor
+	* See http://en.wikipedia.org/wiki/Cofactor_(linear_algebra) for more details
+	*/
+	btScalar cofac(int r1, int c1, int r2, int c2) const 
 	{
-		return btMatrix3x3(m_el[0].x(), m_el[1].x(), m_el[2].x(),
-								 m_el[0].y(), m_el[1].y(), m_el[2].y(),
-								 m_el[0].z(), m_el[1].z(), m_el[2].z());
-	}
-	
-	SIMD_FORCE_INLINE btMatrix3x3 
-	btMatrix3x3::adjoint() const 
-	{
-		return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
-								 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
-								 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
-	}
-	
-	SIMD_FORCE_INLINE btMatrix3x3 
-	btMatrix3x3::inverse() const
-	{
-		btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
-		btScalar det = (*this)[0].dot(co);
-		btFullAssert(det != btScalar(0.0));
-		btScalar s = btScalar(1.0) / det;
-		return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
-								 co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
-								 co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
-	}
-	
-	SIMD_FORCE_INLINE btMatrix3x3 
-	btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
-	{
-		return btMatrix3x3(
-			m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
-			m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
-			m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
-			m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
-			m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
-			m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
-			m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
-			m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
-			m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
-	}
-	
-	SIMD_FORCE_INLINE btMatrix3x3 
-	btMatrix3x3::timesTranspose(const btMatrix3x3& m) const
-	{
-		return btMatrix3x3(
-			m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
-			m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
-			m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
-		
+		return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
 	}
 
-	SIMD_FORCE_INLINE btVector3 
-	operator*(const btMatrix3x3& m, const btVector3& v) 
-	{
-		return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
-	}
-	
+	void	serialize(struct	btMatrix3x3Data& dataOut) const;
 
-	SIMD_FORCE_INLINE btVector3
-	operator*(const btVector3& v, const btMatrix3x3& m)
-	{
-		return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
-	}
+	void	serializeFloat(struct	btMatrix3x3FloatData& dataOut) const;
 
-	SIMD_FORCE_INLINE btMatrix3x3 
-	operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
-	{
-		return btMatrix3x3(
-			m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
-			m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
-			m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
-	}
+	void	deSerialize(const struct	btMatrix3x3Data& dataIn);
+
+	void	deSerializeFloat(const struct	btMatrix3x3FloatData& dataIn);
+
+	void	deSerializeDouble(const struct	btMatrix3x3DoubleData& dataIn);
+
+};
+
+
+SIMD_FORCE_INLINE btMatrix3x3& 
+btMatrix3x3::operator*=(const btMatrix3x3& m)
+{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+    __m128 rv00, rv01, rv02;
+    __m128 rv10, rv11, rv12;
+    __m128 rv20, rv21, rv22;
+    __m128 mv0, mv1, mv2;
+
+    rv02 = m_el[0].mVec128;
+    rv12 = m_el[1].mVec128;
+    rv22 = m_el[2].mVec128;
+
+    mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask); 
+    mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask); 
+    mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask); 
+    
+    // rv0
+    rv00 = bt_splat_ps(rv02, 0);
+    rv01 = bt_splat_ps(rv02, 1);
+    rv02 = bt_splat_ps(rv02, 2);
+    
+    rv00 = _mm_mul_ps(rv00, mv0);
+    rv01 = _mm_mul_ps(rv01, mv1);
+    rv02 = _mm_mul_ps(rv02, mv2);
+    
+    // rv1
+    rv10 = bt_splat_ps(rv12, 0);
+    rv11 = bt_splat_ps(rv12, 1);
+    rv12 = bt_splat_ps(rv12, 2);
+    
+    rv10 = _mm_mul_ps(rv10, mv0);
+    rv11 = _mm_mul_ps(rv11, mv1);
+    rv12 = _mm_mul_ps(rv12, mv2);
+    
+    // rv2
+    rv20 = bt_splat_ps(rv22, 0);
+    rv21 = bt_splat_ps(rv22, 1);
+    rv22 = bt_splat_ps(rv22, 2);
+    
+    rv20 = _mm_mul_ps(rv20, mv0);
+    rv21 = _mm_mul_ps(rv21, mv1);
+    rv22 = _mm_mul_ps(rv22, mv2);
+
+    rv00 = _mm_add_ps(rv00, rv01);
+    rv10 = _mm_add_ps(rv10, rv11);
+    rv20 = _mm_add_ps(rv20, rv21);
+
+    m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
+    m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
+    m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
+
+#elif defined(BT_USE_NEON)
+
+    float32x4_t rv0, rv1, rv2;
+    float32x4_t v0, v1, v2;
+    float32x4_t mv0, mv1, mv2;
+
+    v0 = m_el[0].mVec128;
+    v1 = m_el[1].mVec128;
+    v2 = m_el[2].mVec128;
+
+    mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask); 
+    mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask); 
+    mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask); 
+    
+    rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
+    rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
+    rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
+    
+    rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
+    rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
+    rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
+    
+    rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
+    rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
+    rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
+
+    m_el[0].mVec128 = rv0;
+    m_el[1].mVec128 = rv1;
+    m_el[2].mVec128 = rv2;
+#else    
+	setValue(
+        m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
+		m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
+		m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
+#endif
+	return *this;
+}
+
+SIMD_FORCE_INLINE btMatrix3x3& 
+btMatrix3x3::operator+=(const btMatrix3x3& m)
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+    m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
+    m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
+    m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
+#else
+	setValue(
+		m_el[0][0]+m.m_el[0][0], 
+		m_el[0][1]+m.m_el[0][1],
+		m_el[0][2]+m.m_el[0][2],
+		m_el[1][0]+m.m_el[1][0], 
+		m_el[1][1]+m.m_el[1][1],
+		m_el[1][2]+m.m_el[1][2],
+		m_el[2][0]+m.m_el[2][0], 
+		m_el[2][1]+m.m_el[2][1],
+		m_el[2][2]+m.m_el[2][2]);
+#endif
+	return *this;
+}
+
+SIMD_FORCE_INLINE btMatrix3x3
+operator*(const btMatrix3x3& m, const btScalar & k)
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+    __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
+    return btMatrix3x3(
+                _mm_mul_ps(m[0].mVec128, vk), 
+                _mm_mul_ps(m[1].mVec128, vk), 
+                _mm_mul_ps(m[2].mVec128, vk)); 
+#elif defined(BT_USE_NEON)
+    return btMatrix3x3(
+                vmulq_n_f32(m[0].mVec128, k),
+                vmulq_n_f32(m[1].mVec128, k),
+                vmulq_n_f32(m[2].mVec128, k)); 
+#else
+	return btMatrix3x3(
+		m[0].x()*k,m[0].y()*k,m[0].z()*k,
+		m[1].x()*k,m[1].y()*k,m[1].z()*k,
+		m[2].x()*k,m[2].y()*k,m[2].z()*k);
+#endif
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+	return btMatrix3x3(
+        m1[0].mVec128 + m2[0].mVec128,
+        m1[1].mVec128 + m2[1].mVec128,
+        m1[2].mVec128 + m2[2].mVec128);
+#else
+	return btMatrix3x3(
+        m1[0][0]+m2[0][0], 
+        m1[0][1]+m2[0][1],
+        m1[0][2]+m2[0][2],
+        
+        m1[1][0]+m2[1][0], 
+        m1[1][1]+m2[1][1],
+        m1[1][2]+m2[1][2],
+        
+        m1[2][0]+m2[2][0], 
+        m1[2][1]+m2[2][1],
+        m1[2][2]+m2[2][2]);
+#endif    
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+	return btMatrix3x3(
+        m1[0].mVec128 - m2[0].mVec128,
+        m1[1].mVec128 - m2[1].mVec128,
+        m1[2].mVec128 - m2[2].mVec128);
+#else
+	return btMatrix3x3(
+        m1[0][0]-m2[0][0], 
+        m1[0][1]-m2[0][1],
+        m1[0][2]-m2[0][2],
+        
+        m1[1][0]-m2[1][0], 
+        m1[1][1]-m2[1][1],
+        m1[1][2]-m2[1][2],
+        
+        m1[2][0]-m2[2][0], 
+        m1[2][1]-m2[2][1],
+        m1[2][2]-m2[2][2]);
+#endif
+}
+
+
+SIMD_FORCE_INLINE btMatrix3x3& 
+btMatrix3x3::operator-=(const btMatrix3x3& m)
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+    m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
+    m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
+    m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
+#else
+	setValue(
+	m_el[0][0]-m.m_el[0][0], 
+	m_el[0][1]-m.m_el[0][1],
+	m_el[0][2]-m.m_el[0][2],
+	m_el[1][0]-m.m_el[1][0], 
+	m_el[1][1]-m.m_el[1][1],
+	m_el[1][2]-m.m_el[1][2],
+	m_el[2][0]-m.m_el[2][0], 
+	m_el[2][1]-m.m_el[2][1],
+	m_el[2][2]-m.m_el[2][2]);
+#endif
+	return *this;
+}
+
+
+SIMD_FORCE_INLINE btScalar 
+btMatrix3x3::determinant() const
+{ 
+	return btTriple((*this)[0], (*this)[1], (*this)[2]);
+}
+
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::absolute() const
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+    return btMatrix3x3(
+            _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
+            _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
+            _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
+#elif defined(BT_USE_NEON)
+    return btMatrix3x3(
+            (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
+            (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
+            (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
+#else	
+	return btMatrix3x3(
+            btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
+            btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
+            btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
+#endif
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::transpose() const 
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+    __m128 v0 = m_el[0].mVec128;
+    __m128 v1 = m_el[1].mVec128;
+    __m128 v2 = m_el[2].mVec128;    //  x2 y2 z2 w2
+    __m128 vT;
+    
+    v2 = _mm_and_ps(v2, btvFFF0fMask);  //  x2 y2 z2 0
+    
+    vT = _mm_unpackhi_ps(v0, v1);	//	z0 z1 * *
+    v0 = _mm_unpacklo_ps(v0, v1);	//	x0 x1 y0 y1
+
+    v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );	// y0 y1 y2 0
+    v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );	// x0 x1 x2 0
+    v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));	// z0 z1 z2 0
+
+
+    return btMatrix3x3( v0, v1, v2 );
+#elif defined(BT_USE_NEON)
+    // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
+    static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
+    float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  // {x0 x1 z0 z1}, {y0 y1 w0 w1}
+    float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       // {x2  0 }, {y2 0}
+    float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
+    float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
+    float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
+    float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       // z0 z1 z2  0
+    return btMatrix3x3( v0, v1, v2 ); 
+#else
+	return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
+                        m_el[0].y(), m_el[1].y(), m_el[2].y(),
+                        m_el[0].z(), m_el[1].z(), m_el[2].z());
+#endif
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::adjoint() const 
+{
+	return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
+		cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
+		cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::inverse() const
+{
+	btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
+	btScalar det = (*this)[0].dot(co);
+	btFullAssert(det != btScalar(0.0));
+	btScalar s = btScalar(1.0) / det;
+	return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
+		co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
+		co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::transposeTimes(const btMatrix3x3& m) const
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+    // zeros w
+//    static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
+    __m128 row = m_el[0].mVec128;
+    __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
+    __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
+    __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
+    __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
+    __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
+    __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
+    row = m_el[1].mVec128;
+    r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
+    r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
+    r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
+    row = m_el[2].mVec128;
+    r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
+    r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
+    r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
+    return btMatrix3x3( r0, r1, r2 );
+
+#elif defined BT_USE_NEON
+    // zeros w
+    static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
+    float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
+    float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
+    float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
+    float32x4_t row = m_el[0].mVec128;
+    float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
+    float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
+    float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
+    row = m_el[1].mVec128;
+    r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
+    r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
+    r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
+    row = m_el[2].mVec128;
+    r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
+    r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
+    r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
+    return btMatrix3x3( r0, r1, r2 );
+#else
+    return btMatrix3x3(
+		m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
+		m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
+		m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
+		m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
+		m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
+		m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
+		m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
+		m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
+		m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
+#endif
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+btMatrix3x3::timesTranspose(const btMatrix3x3& m) const
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+    __m128 a0 = m_el[0].mVec128;
+    __m128 a1 = m_el[1].mVec128;
+    __m128 a2 = m_el[2].mVec128;
+    
+    btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
+    __m128 mx = mT[0].mVec128;
+    __m128 my = mT[1].mVec128;
+    __m128 mz = mT[2].mVec128;
+    
+    __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
+    __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
+    __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
+    r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
+    r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
+    r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
+    r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
+    r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
+    r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
+    return btMatrix3x3( r0, r1, r2);
+            
+#elif defined BT_USE_NEON
+    float32x4_t a0 = m_el[0].mVec128;
+    float32x4_t a1 = m_el[1].mVec128;
+    float32x4_t a2 = m_el[2].mVec128;
+    
+    btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
+    float32x4_t mx = mT[0].mVec128;
+    float32x4_t my = mT[1].mVec128;
+    float32x4_t mz = mT[2].mVec128;
+    
+    float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
+    float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
+    float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
+    r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
+    r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
+    r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
+    r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
+    r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
+    r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
+    return btMatrix3x3( r0, r1, r2 );
+    
+#else
+	return btMatrix3x3(
+		m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
+		m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
+		m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
+#endif
+}
+
+SIMD_FORCE_INLINE btVector3 
+operator*(const btMatrix3x3& m, const btVector3& v) 
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
+    return v.dot3(m[0], m[1], m[2]);
+#else
+	return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
+#endif
+}
+
+
+SIMD_FORCE_INLINE btVector3
+operator*(const btVector3& v, const btMatrix3x3& m)
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+
+    const __m128 vv = v.mVec128;
+
+    __m128 c0 = bt_splat_ps( vv, 0);
+    __m128 c1 = bt_splat_ps( vv, 1);
+    __m128 c2 = bt_splat_ps( vv, 2);
+
+    c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
+    c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
+    c0 = _mm_add_ps(c0, c1);
+    c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
+    
+    return btVector3(_mm_add_ps(c0, c2));
+#elif defined(BT_USE_NEON)
+    const float32x4_t vv = v.mVec128;
+    const float32x2_t vlo = vget_low_f32(vv);
+    const float32x2_t vhi = vget_high_f32(vv);
+
+    float32x4_t c0, c1, c2;
+
+    c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
+    c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
+    c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
+
+    c0 = vmulq_lane_f32(c0, vlo, 0);
+    c1 = vmulq_lane_f32(c1, vlo, 1);
+    c2 = vmulq_lane_f32(c2, vhi, 0);
+    c0 = vaddq_f32(c0, c1);
+    c0 = vaddq_f32(c0, c2);
+    
+    return btVector3(c0);
+#else
+	return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
+#endif
+}
+
+SIMD_FORCE_INLINE btMatrix3x3 
+operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
+{
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+
+    __m128 m10 = m1[0].mVec128;  
+    __m128 m11 = m1[1].mVec128;
+    __m128 m12 = m1[2].mVec128;
+    
+    __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
+    
+    __m128 c0 = bt_splat_ps( m10, 0);
+    __m128 c1 = bt_splat_ps( m11, 0);
+    __m128 c2 = bt_splat_ps( m12, 0);
+    
+    c0 = _mm_mul_ps(c0, m2v);
+    c1 = _mm_mul_ps(c1, m2v);
+    c2 = _mm_mul_ps(c2, m2v);
+    
+    m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
+    
+    __m128 c0_1 = bt_splat_ps( m10, 1);
+    __m128 c1_1 = bt_splat_ps( m11, 1);
+    __m128 c2_1 = bt_splat_ps( m12, 1);
+    
+    c0_1 = _mm_mul_ps(c0_1, m2v);
+    c1_1 = _mm_mul_ps(c1_1, m2v);
+    c2_1 = _mm_mul_ps(c2_1, m2v);
+    
+    m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
+    
+    c0 = _mm_add_ps(c0, c0_1);
+    c1 = _mm_add_ps(c1, c1_1);
+    c2 = _mm_add_ps(c2, c2_1);
+    
+    m10 = bt_splat_ps( m10, 2);
+    m11 = bt_splat_ps( m11, 2);
+    m12 = bt_splat_ps( m12, 2);
+    
+    m10 = _mm_mul_ps(m10, m2v);
+    m11 = _mm_mul_ps(m11, m2v);
+    m12 = _mm_mul_ps(m12, m2v);
+    
+    c0 = _mm_add_ps(c0, m10);
+    c1 = _mm_add_ps(c1, m11);
+    c2 = _mm_add_ps(c2, m12);
+    
+    return btMatrix3x3(c0, c1, c2);
+
+#elif defined(BT_USE_NEON)
+
+    float32x4_t rv0, rv1, rv2;
+    float32x4_t v0, v1, v2;
+    float32x4_t mv0, mv1, mv2;
+
+    v0 = m1[0].mVec128;
+    v1 = m1[1].mVec128;
+    v2 = m1[2].mVec128;
+
+    mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask); 
+    mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask); 
+    mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask); 
+    
+    rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
+    rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
+    rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
+    
+    rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
+    rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
+    rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
+    
+    rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
+    rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
+    rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
+
+	return btMatrix3x3(rv0, rv1, rv2);
+        
+#else	
+	return btMatrix3x3(
+		m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
+		m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
+		m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
+#endif
+}
 
 /*
-	SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
-    return btMatrix3x3(
-        m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
-        m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
-        m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
-        m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
-        m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
-        m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
-        m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
-        m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
-        m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
+SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
+return btMatrix3x3(
+m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
+m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
+m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
+m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
+m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
+m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
+m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
+m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
+m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
 }
 */
 
 /**@brief Equality operator between two matrices
- * It will test all elements are equal.  */
+* It will test all elements are equal.  */
 SIMD_FORCE_INLINE bool operator==(const btMatrix3x3& m1, const btMatrix3x3& m2)
 {
-   return ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
-            m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
-            m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+
+    __m128 c0, c1, c2;
+
+    c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
+    c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
+    c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
+    
+    c0 = _mm_and_ps(c0, c1);
+    c0 = _mm_and_ps(c0, c2);
+
+    return (0x7 == _mm_movemask_ps((__m128)c0));
+#else 
+	return 
+    (   m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
+		m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
+		m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
+#endif
 }
 
-#endif
+///for serialization
+struct	btMatrix3x3FloatData
+{
+	btVector3FloatData m_el[3];
+};
+
+///for serialization
+struct	btMatrix3x3DoubleData
+{
+	btVector3DoubleData m_el[3];
+};
+
+
+	
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::serialize(struct	btMatrix3x3Data& dataOut) const
+{
+	for (int i=0;i<3;i++)
+		m_el[i].serialize(dataOut.m_el[i]);
+}
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::serializeFloat(struct	btMatrix3x3FloatData& dataOut) const
+{
+	for (int i=0;i<3;i++)
+		m_el[i].serializeFloat(dataOut.m_el[i]);
+}
+
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::deSerialize(const struct	btMatrix3x3Data& dataIn)
+{
+	for (int i=0;i<3;i++)
+		m_el[i].deSerialize(dataIn.m_el[i]);
+}
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::deSerializeFloat(const struct	btMatrix3x3FloatData& dataIn)
+{
+	for (int i=0;i<3;i++)
+		m_el[i].deSerializeFloat(dataIn.m_el[i]);
+}
+
+SIMD_FORCE_INLINE	void	btMatrix3x3::deSerializeDouble(const struct	btMatrix3x3DoubleData& dataIn)
+{
+	for (int i=0;i<3;i++)
+		m_el[i].deSerializeDouble(dataIn.m_el[i]);
+}
+
+#endif //BT_MATRIX3x3_H
+
diff --git a/Engine/lib/bullet/src/LinearMath/btMinMax.h b/Engine/lib/bullet/src/LinearMath/btMinMax.h
index 5e27d62a4..5b436e9ba 100644
--- a/Engine/lib/bullet/src/LinearMath/btMinMax.h
+++ b/Engine/lib/bullet/src/LinearMath/btMinMax.h
@@ -14,8 +14,10 @@ subject to the following restrictions:
 
 
 
-#ifndef GEN_MINMAX_H
-#define GEN_MINMAX_H
+#ifndef BT_GEN_MINMAX_H
+#define BT_GEN_MINMAX_H
+
+#include "btScalar.h"
 
 template <class T>
 SIMD_FORCE_INLINE const T& btMin(const T& a, const T& b) 
@@ -30,7 +32,7 @@ SIMD_FORCE_INLINE const T& btMax(const T& a, const T& b)
 }
 
 template <class T>
-SIMD_FORCE_INLINE const T& GEN_clamped(const T& a, const T& lb, const T& ub) 
+SIMD_FORCE_INLINE const T& btClamped(const T& a, const T& lb, const T& ub) 
 {
 	return a < lb ? lb : (ub < a ? ub : a); 
 }
@@ -54,7 +56,7 @@ SIMD_FORCE_INLINE void btSetMax(T& a, const T& b)
 }
 
 template <class T>
-SIMD_FORCE_INLINE void GEN_clamp(T& a, const T& lb, const T& ub) 
+SIMD_FORCE_INLINE void btClamp(T& a, const T& lb, const T& ub) 
 {
 	if (a < lb) 
 	{
@@ -66,4 +68,4 @@ SIMD_FORCE_INLINE void GEN_clamp(T& a, const T& lb, const T& ub)
 	}
 }
 
-#endif
+#endif //BT_GEN_MINMAX_H
diff --git a/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.cpp b/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.cpp
new file mode 100644
index 000000000..d7de20408
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.cpp
@@ -0,0 +1,99 @@
+#include "btPolarDecomposition.h"
+#include "btMinMax.h"
+
+namespace
+{
+  btScalar abs_column_sum(const btMatrix3x3& a, int i)
+  {
+    return btFabs(a[0][i]) + btFabs(a[1][i]) + btFabs(a[2][i]);
+  }
+
+  btScalar abs_row_sum(const btMatrix3x3& a, int i)
+  {
+    return btFabs(a[i][0]) + btFabs(a[i][1]) + btFabs(a[i][2]);
+  }
+
+  btScalar p1_norm(const btMatrix3x3& a)
+  {
+    const btScalar sum0 = abs_column_sum(a,0);
+    const btScalar sum1 = abs_column_sum(a,1);
+    const btScalar sum2 = abs_column_sum(a,2);
+    return btMax(btMax(sum0, sum1), sum2);
+  }
+
+  btScalar pinf_norm(const btMatrix3x3& a)
+  {
+    const btScalar sum0 = abs_row_sum(a,0);
+    const btScalar sum1 = abs_row_sum(a,1);
+    const btScalar sum2 = abs_row_sum(a,2);
+    return btMax(btMax(sum0, sum1), sum2);
+  }
+}
+
+const btScalar btPolarDecomposition::DEFAULT_TOLERANCE = btScalar(0.0001);
+const unsigned int btPolarDecomposition::DEFAULT_MAX_ITERATIONS = 16;
+
+btPolarDecomposition::btPolarDecomposition(btScalar tolerance, unsigned int maxIterations)
+: m_tolerance(tolerance)
+, m_maxIterations(maxIterations)
+{
+}
+
+unsigned int btPolarDecomposition::decompose(const btMatrix3x3& a, btMatrix3x3& u, btMatrix3x3& h) const
+{
+  // Use the 'u' and 'h' matrices for intermediate calculations
+  u = a;
+  h = a.inverse();
+
+  for (unsigned int i = 0; i < m_maxIterations; ++i)
+  {
+    const btScalar h_1 = p1_norm(h);
+    const btScalar h_inf = pinf_norm(h);
+    const btScalar u_1 = p1_norm(u);
+    const btScalar u_inf = pinf_norm(u);
+
+    const btScalar h_norm = h_1 * h_inf;
+    const btScalar u_norm = u_1 * u_inf;
+
+    // The matrix is effectively singular so we cannot invert it
+    if (btFuzzyZero(h_norm) || btFuzzyZero(u_norm))
+      break;
+
+    const btScalar gamma = btPow(h_norm / u_norm, 0.25f);
+    const btScalar inv_gamma = 1.0 / gamma;
+
+    // Determine the delta to 'u'
+    const btMatrix3x3 delta = (u * (gamma - 2.0) + h.transpose() * inv_gamma) * 0.5;
+
+    // Update the matrices
+    u += delta;
+    h = u.inverse();
+
+    // Check for convergence
+    if (p1_norm(delta) <= m_tolerance * u_1)
+    {
+      h = u.transpose() * a;
+      h = (h + h.transpose()) * 0.5;
+      return i;
+    }
+  }
+
+  // The algorithm has failed to converge to the specified tolerance, but we
+  // want to make sure that the matrices returned are in the right form.
+  h = u.transpose() * a;
+  h = (h + h.transpose()) * 0.5;
+
+  return m_maxIterations;
+}
+
+unsigned int btPolarDecomposition::maxIterations() const
+{
+  return m_maxIterations;
+}
+
+unsigned int polarDecompose(const btMatrix3x3& a, btMatrix3x3& u, btMatrix3x3& h)
+{
+  static btPolarDecomposition polar;
+  return polar.decompose(a, u, h);
+}
+
diff --git a/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.h b/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.h
new file mode 100644
index 000000000..561566764
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btPolarDecomposition.h
@@ -0,0 +1,73 @@
+#ifndef POLARDECOMPOSITION_H
+#define POLARDECOMPOSITION_H
+
+#include "btMatrix3x3.h"
+
+/**
+ * This class is used to compute the polar decomposition of a matrix. In
+ * general, the polar decomposition factorizes a matrix, A, into two parts: a
+ * unitary matrix (U) and a positive, semi-definite Hermitian matrix (H).
+ * However, in this particular implementation the original matrix, A, is
+ * required to be a square 3x3 matrix with real elements. This means that U will
+ * be an orthogonal matrix and H with be a positive-definite, symmetric matrix.
+ */
+class btPolarDecomposition
+{
+  public:
+    static const btScalar DEFAULT_TOLERANCE;
+    static const unsigned int DEFAULT_MAX_ITERATIONS;
+
+    /**
+     * Creates an instance with optional parameters.
+     *
+     * @param tolerance     - the tolerance used to determine convergence of the
+     *                        algorithm
+     * @param maxIterations - the maximum number of iterations used to achieve
+     *                        convergence
+     */
+    btPolarDecomposition(btScalar tolerance = DEFAULT_TOLERANCE, 
+      unsigned int maxIterations = DEFAULT_MAX_ITERATIONS);
+
+    /**
+     * Decomposes a matrix into orthogonal and symmetric, positive-definite
+     * parts. If the number of iterations returned by this function is equal to
+     * the maximum number of iterations, the algorithm has failed to converge.
+     *
+     * @param a - the original matrix
+     * @param u - the resulting orthogonal matrix
+     * @param h - the resulting symmetric matrix
+     *
+     * @return the number of iterations performed by the algorithm.
+     */
+    unsigned int decompose(const btMatrix3x3& a, btMatrix3x3& u, btMatrix3x3& h) const; 
+
+    /**
+     * Returns the maximum number of iterations that this algorithm will perform
+     * to achieve convergence.
+     *
+     * @return maximum number of iterations
+     */
+    unsigned int maxIterations() const;
+
+  private:
+    btScalar m_tolerance;
+    unsigned int m_maxIterations;
+};
+
+/**
+ * This functions decomposes the matrix 'a' into two parts: an orthogonal matrix
+ * 'u' and a symmetric, positive-definite matrix 'h'. If the number of
+ * iterations returned by this function is equal to
+ * btPolarDecomposition::DEFAULT_MAX_ITERATIONS, the algorithm has failed to
+ * converge.
+ *
+ * @param a - the original matrix
+ * @param u - the resulting orthogonal matrix
+ * @param h - the resulting symmetric matrix
+ *
+ * @return the number of iterations performed by the algorithm.
+ */
+unsigned int polarDecompose(const btMatrix3x3& a, btMatrix3x3& u, btMatrix3x3& h); 
+
+#endif // POLARDECOMPOSITION_H
+
diff --git a/Engine/lib/bullet/src/LinearMath/btPoolAllocator.h b/Engine/lib/bullet/src/LinearMath/btPoolAllocator.h
index 39d2559c7..ef2084537 100644
--- a/Engine/lib/bullet/src/LinearMath/btPoolAllocator.h
+++ b/Engine/lib/bullet/src/LinearMath/btPoolAllocator.h
@@ -57,6 +57,16 @@ public:
 		return m_freeCount;
 	}
 
+	int getUsedCount() const
+	{
+		return m_maxElements - m_freeCount;
+	}
+
+	int getMaxCount() const
+	{
+		return m_maxElements;
+	}
+
 	void*	allocate(int size)
 	{
 		// release mode fix
@@ -96,6 +106,15 @@ public:
 		return m_elemSize;
 	}
 
+	unsigned char*	getPoolAddress()
+	{
+		return m_pool;
+	}
+
+	const unsigned char*	getPoolAddress() const
+	{
+		return m_pool;
+	}
 
 };
 
diff --git a/Engine/lib/bullet/src/LinearMath/btQuadWord.h b/Engine/lib/bullet/src/LinearMath/btQuadWord.h
index c657afd2b..11067ef47 100644
--- a/Engine/lib/bullet/src/LinearMath/btQuadWord.h
+++ b/Engine/lib/bullet/src/LinearMath/btQuadWord.h
@@ -13,13 +13,16 @@ subject to the following restrictions:
 */
 
 
-#ifndef SIMD_QUADWORD_H
-#define SIMD_QUADWORD_H
+#ifndef BT_SIMD_QUADWORD_H
+#define BT_SIMD_QUADWORD_H
 
 #include "btScalar.h"
 #include "btMinMax.h"
 
 
+
+
+
 #if defined (__CELLOS_LV2) && defined (__SPU__)
 #include <altivec.h>
 #endif
@@ -47,11 +50,53 @@ public:
 	}
 protected:
 #else //__CELLOS_LV2__ __SPU__
+
+#if defined(BT_USE_SSE) || defined(BT_USE_NEON) 
+	union {
+		btSimdFloat4 mVec128;
+		btScalar	m_floats[4];
+	};
+public:
+	SIMD_FORCE_INLINE	btSimdFloat4	get128() const
+	{
+		return mVec128;
+	}
+	SIMD_FORCE_INLINE	void	set128(btSimdFloat4 v128)
+	{
+		mVec128 = v128;
+	}
+#else
 	btScalar	m_floats[4];
+#endif // BT_USE_SSE
+
 #endif //__CELLOS_LV2__ __SPU__
 
 	public:
   
+#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
+
+	// Set Vector 
+	SIMD_FORCE_INLINE btQuadWord(const btSimdFloat4 vec)
+	{
+		mVec128 = vec;
+	}
+
+	// Copy constructor
+	SIMD_FORCE_INLINE btQuadWord(const btQuadWord& rhs)
+	{
+		mVec128 = rhs.mVec128;
+	}
+
+	// Assignment Operator
+	SIMD_FORCE_INLINE btQuadWord& 
+	operator=(const btQuadWord& v) 
+	{
+		mVec128 = v.mVec128;
+		
+		return *this;
+	}
+	
+#endif
 
   /**@brief Return the x value */
 		SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
@@ -60,13 +105,13 @@ protected:
   /**@brief Return the z value */
 		SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
   /**@brief Set the x value */
-		SIMD_FORCE_INLINE void	setX(btScalar x) { m_floats[0] = x;};
+		SIMD_FORCE_INLINE void	setX(btScalar _x) { m_floats[0] = _x;};
   /**@brief Set the y value */
-		SIMD_FORCE_INLINE void	setY(btScalar y) { m_floats[1] = y;};
+		SIMD_FORCE_INLINE void	setY(btScalar _y) { m_floats[1] = _y;};
   /**@brief Set the z value */
-		SIMD_FORCE_INLINE void	setZ(btScalar z) { m_floats[2] = z;};
+		SIMD_FORCE_INLINE void	setZ(btScalar _z) { m_floats[2] = _z;};
   /**@brief Set the w value */
-		SIMD_FORCE_INLINE void	setW(btScalar w) { m_floats[3] = w;};
+		SIMD_FORCE_INLINE void	setW(btScalar _w) { m_floats[3] = _w;};
   /**@brief Return the x value */
 		SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
   /**@brief Return the y value */
@@ -84,7 +129,14 @@ protected:
 
 	SIMD_FORCE_INLINE	bool	operator==(const btQuadWord& other) const
 	{
-		return ((m_floats[3]==other.m_floats[3]) && (m_floats[2]==other.m_floats[2]) && (m_floats[1]==other.m_floats[1]) && (m_floats[0]==other.m_floats[0]));
+#ifdef BT_USE_SSE
+        return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
+#else 
+		return ((m_floats[3]==other.m_floats[3]) && 
+                (m_floats[2]==other.m_floats[2]) && 
+                (m_floats[1]==other.m_floats[1]) && 
+                (m_floats[0]==other.m_floats[0]));
+#endif
 	}
 
 	SIMD_FORCE_INLINE	bool	operator!=(const btQuadWord& other) const
@@ -97,11 +149,11 @@ protected:
    * @param y Value of y
    * @param z Value of z
    */
-		SIMD_FORCE_INLINE void 	setValue(const btScalar& x, const btScalar& y, const btScalar& z)
+		SIMD_FORCE_INLINE void 	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
 		{
-			m_floats[0]=x;
-			m_floats[1]=y;
-			m_floats[2]=z;
+			m_floats[0]=_x;
+			m_floats[1]=_y;
+			m_floats[2]=_z;
 			m_floats[3] = 0.f;
 		}
 
@@ -118,12 +170,12 @@ protected:
    * @param z Value of z
    * @param w Value of w
    */
-		SIMD_FORCE_INLINE void	setValue(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w)
+		SIMD_FORCE_INLINE void	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
 		{
-			m_floats[0]=x;
-			m_floats[1]=y;
-			m_floats[2]=z;
-			m_floats[3]=w;
+			m_floats[0]=_x;
+			m_floats[1]=_y;
+			m_floats[2]=_z;
+			m_floats[3]=_w;
 		}
   /**@brief No initialization constructor */
 		SIMD_FORCE_INLINE btQuadWord()
@@ -136,9 +188,9 @@ protected:
    * @param y Value of y
    * @param z Value of z
    */
-		SIMD_FORCE_INLINE btQuadWord(const btScalar& x, const btScalar& y, const btScalar& z)		
+		SIMD_FORCE_INLINE btQuadWord(const btScalar& _x, const btScalar& _y, const btScalar& _z)		
 		{
-			m_floats[0] = x, m_floats[1] = y, m_floats[2] = z, m_floats[3] = 0.0f;
+			m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = 0.0f;
 		}
 
 /**@brief Initializing constructor
@@ -147,9 +199,9 @@ protected:
    * @param z Value of z
    * @param w Value of w
    */
-		SIMD_FORCE_INLINE btQuadWord(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w) 
+		SIMD_FORCE_INLINE btQuadWord(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w) 
 		{
-			m_floats[0] = x, m_floats[1] = y, m_floats[2] = z, m_floats[3] = w;
+			m_floats[0] = _x, m_floats[1] = _y, m_floats[2] = _z, m_floats[3] = _w;
 		}
 
   /**@brief Set each element to the max of the current values and the values of another btQuadWord
@@ -157,24 +209,36 @@ protected:
    */
 		SIMD_FORCE_INLINE void	setMax(const btQuadWord& other)
 		{
-			btSetMax(m_floats[0], other.m_floats[0]);
+        #ifdef BT_USE_SSE
+            mVec128 = _mm_max_ps(mVec128, other.mVec128);
+        #elif defined(BT_USE_NEON)
+            mVec128 = vmaxq_f32(mVec128, other.mVec128);
+        #else
+        	btSetMax(m_floats[0], other.m_floats[0]);
 			btSetMax(m_floats[1], other.m_floats[1]);
 			btSetMax(m_floats[2], other.m_floats[2]);
 			btSetMax(m_floats[3], other.m_floats[3]);
-		}
+		#endif
+        }
   /**@brief Set each element to the min of the current values and the values of another btQuadWord
    * @param other The other btQuadWord to compare with 
    */
 		SIMD_FORCE_INLINE void	setMin(const btQuadWord& other)
 		{
-			btSetMin(m_floats[0], other.m_floats[0]);
+        #ifdef BT_USE_SSE
+            mVec128 = _mm_min_ps(mVec128, other.mVec128);
+        #elif defined(BT_USE_NEON)
+            mVec128 = vminq_f32(mVec128, other.mVec128);
+        #else
+        	btSetMin(m_floats[0], other.m_floats[0]);
 			btSetMin(m_floats[1], other.m_floats[1]);
 			btSetMin(m_floats[2], other.m_floats[2]);
 			btSetMin(m_floats[3], other.m_floats[3]);
-		}
+		#endif
+        }
 
 
 
 };
 
-#endif //SIMD_QUADWORD_H
+#endif //BT_SIMD_QUADWORD_H
diff --git a/Engine/lib/bullet/src/LinearMath/btQuaternion.h b/Engine/lib/bullet/src/LinearMath/btQuaternion.h
index f530c71df..a1db819de 100644
--- a/Engine/lib/bullet/src/LinearMath/btQuaternion.h
+++ b/Engine/lib/bullet/src/LinearMath/btQuaternion.h
@@ -14,31 +14,72 @@ subject to the following restrictions:
 
 
 
-#ifndef SIMD__QUATERNION_H_
-#define SIMD__QUATERNION_H_
+#ifndef BT_SIMD__QUATERNION_H_
+#define BT_SIMD__QUATERNION_H_
 
 
 #include "btVector3.h"
 #include "btQuadWord.h"
 
+
+
+
+
+#ifdef BT_USE_SSE
+
+const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
+
+#endif
+
+#if defined(BT_USE_SSE) || defined(BT_USE_NEON)
+
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
+
+#endif
+
 /**@brief The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatrix3x3, btVector3 and btTransform. */
 class btQuaternion : public btQuadWord {
 public:
   /**@brief No initialization constructor */
 	btQuaternion() {}
 
+#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON) 
+	// Set Vector 
+	SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
+	{
+		mVec128 = vec;
+	}
+
+	// Copy constructor
+	SIMD_FORCE_INLINE btQuaternion(const btQuaternion& rhs)
+	{
+		mVec128 = rhs.mVec128;
+	}
+
+	// Assignment Operator
+	SIMD_FORCE_INLINE btQuaternion& 
+	operator=(const btQuaternion& v) 
+	{
+		mVec128 = v.mVec128;
+		
+		return *this;
+	}
+	
+#endif
+
 	//		template <typename btScalar>
 	//		explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
   /**@brief Constructor from scalars */
-	btQuaternion(const btScalar& x, const btScalar& y, const btScalar& z, const btScalar& w) 
-		: btQuadWord(x, y, z, w) 
+	btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w) 
+		: btQuadWord(_x, _y, _z, _w) 
 	{}
   /**@brief Axis angle Constructor
    * @param axis The axis which the rotation is around
    * @param angle The magnitude of the rotation around the angle (Radians) */
-	btQuaternion(const btVector3& axis, const btScalar& angle) 
+	btQuaternion(const btVector3& _axis, const btScalar& _angle) 
 	{ 
-		setRotation(axis, angle); 
+		setRotation(_axis, _angle); 
 	}
   /**@brief Constructor from Euler angles
    * @param yaw Angle around Y unless BT_EULER_DEFAULT_ZYX defined then Z
@@ -55,13 +96,13 @@ public:
   /**@brief Set the rotation using axis angle notation 
    * @param axis The axis around which to rotate
    * @param angle The magnitude of the rotation in Radians */
-	void setRotation(const btVector3& axis, const btScalar& angle)
+	void setRotation(const btVector3& axis, const btScalar& _angle)
 	{
 		btScalar d = axis.length();
 		btAssert(d != btScalar(0.0));
-		btScalar s = btSin(angle * btScalar(0.5)) / d;
+		btScalar s = btSin(_angle * btScalar(0.5)) / d;
 		setValue(axis.x() * s, axis.y() * s, axis.z() * s, 
-			btCos(angle * btScalar(0.5)));
+			btCos(_angle * btScalar(0.5)));
 	}
   /**@brief Set the quaternion using Euler angles
    * @param yaw Angle around Y
@@ -107,7 +148,16 @@ public:
    * @param q The quaternion to add to this one */
 	SIMD_FORCE_INLINE	btQuaternion& operator+=(const btQuaternion& q)
 	{
-		m_floats[0] += q.x(); m_floats[1] += q.y(); m_floats[2] += q.z(); m_floats[3] += q.m_floats[3];
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = _mm_add_ps(mVec128, q.mVec128);
+#elif defined(BT_USE_NEON)
+		mVec128 = vaddq_f32(mVec128, q.mVec128);
+#else	
+		m_floats[0] += q.x(); 
+        m_floats[1] += q.y(); 
+        m_floats[2] += q.z(); 
+        m_floats[3] += q.m_floats[3];
+#endif
 		return *this;
 	}
 
@@ -115,15 +165,35 @@ public:
    * @param q The quaternion to subtract from this one */
 	btQuaternion& operator-=(const btQuaternion& q) 
 	{
-		m_floats[0] -= q.x(); m_floats[1] -= q.y(); m_floats[2] -= q.z(); m_floats[3] -= q.m_floats[3];
-		return *this;
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = _mm_sub_ps(mVec128, q.mVec128);
+#elif defined(BT_USE_NEON)
+		mVec128 = vsubq_f32(mVec128, q.mVec128);
+#else	
+		m_floats[0] -= q.x(); 
+        m_floats[1] -= q.y(); 
+        m_floats[2] -= q.z(); 
+        m_floats[3] -= q.m_floats[3];
+#endif
+        return *this;
 	}
 
   /**@brief Scale this quaternion
    * @param s The scalar to scale by */
 	btQuaternion& operator*=(const btScalar& s)
 	{
-		m_floats[0] *= s; m_floats[1] *= s; m_floats[2] *= s; m_floats[3] *= s;
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
+		vs = bt_pshufd_ps(vs, 0);	//	(S S S S)
+		mVec128 = _mm_mul_ps(mVec128, vs);
+#elif defined(BT_USE_NEON)
+		mVec128 = vmulq_n_f32(mVec128, s);
+#else
+		m_floats[0] *= s; 
+        m_floats[1] *= s; 
+        m_floats[2] *= s; 
+        m_floats[3] *= s;
+#endif
 		return *this;
 	}
 
@@ -132,17 +202,111 @@ public:
    * Equivilant to this = this * q */
 	btQuaternion& operator*=(const btQuaternion& q)
 	{
-		setValue(m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128 vQ2 = q.get128();
+		
+		__m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
+		__m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
+		
+		A1 = A1 * B1;
+		
+		__m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
+		__m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
+		
+		A2 = A2 * B2;
+		
+		B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
+		B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
+		
+		B1 = B1 * B2;	//	A3 *= B3
+		
+		mVec128 = bt_splat_ps(mVec128, 3);	//	A0
+		mVec128 = mVec128 * vQ2;	//	A0 * B0
+		
+		A1 = A1 + A2;	//	AB12
+		mVec128 = mVec128 - B1;	//	AB03 = AB0 - AB3 
+		A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
+		mVec128 = mVec128+ A1;	//	AB03 + AB12
+
+#elif defined(BT_USE_NEON)     
+
+        float32x4_t vQ1 = mVec128;
+        float32x4_t vQ2 = q.get128();
+        float32x4_t A0, A1, B1, A2, B2, A3, B3;
+        float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+        
+        {
+        float32x2x2_t tmp;
+        tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
+        vQ1zx = tmp.val[0];
+
+        tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
+        vQ2zx = tmp.val[0];
+        }
+        vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
+
+        vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+        vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+        vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+        A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
+        B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
+
+        A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+        B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+        A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
+        B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
+
+        A1 = vmulq_f32(A1, B1);
+        A2 = vmulq_f32(A2, B2);
+        A3 = vmulq_f32(A3, B3);	//	A3 *= B3
+        A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
+
+        A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
+        A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3 
+        
+        //	change the sign of the last element
+        A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);	
+        A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
+        
+        mVec128 = A0;
+#else
+		setValue(
+            m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
 			m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
 			m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
 			m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
+#endif
 		return *this;
 	}
   /**@brief Return the dot product between this quaternion and another
    * @param q The other quaternion */
 	btScalar dot(const btQuaternion& q) const
 	{
-		return m_floats[0] * q.x() + m_floats[1] * q.y() + m_floats[2] * q.z() + m_floats[3] * q.m_floats[3];
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	vd;
+		
+		vd = _mm_mul_ps(mVec128, q.mVec128);
+		
+        __m128 t = _mm_movehl_ps(vd, vd);
+		vd = _mm_add_ps(vd, t);
+		t = _mm_shuffle_ps(vd, vd, 0x55);
+		vd = _mm_add_ss(vd, t);
+		
+        return _mm_cvtss_f32(vd);
+#elif defined(BT_USE_NEON)
+		float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
+		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));  
+		x = vpadd_f32(x, x);
+		return vget_lane_f32(x, 0);
+#else    
+		return  m_floats[0] * q.x() + 
+                m_floats[1] * q.y() + 
+                m_floats[2] * q.z() + 
+                m_floats[3] * q.m_floats[3];
+#endif
 	}
 
   /**@brief Return the length squared of the quaternion */
@@ -161,7 +325,25 @@ public:
    * Such that x^2 + y^2 + z^2 +w^2 = 1 */
 	btQuaternion& normalize() 
 	{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	vd;
+		
+		vd = _mm_mul_ps(mVec128, mVec128);
+		
+        __m128 t = _mm_movehl_ps(vd, vd);
+		vd = _mm_add_ps(vd, t);
+		t = _mm_shuffle_ps(vd, vd, 0x55);
+		vd = _mm_add_ss(vd, t);
+
+		vd = _mm_sqrt_ss(vd);
+		vd = _mm_div_ss(vOnes, vd);
+        vd = bt_pshufd_ps(vd, 0); // splat
+		mVec128 = _mm_mul_ps(mVec128, vd);
+    
+		return *this;
+#else    
 		return *this /= length();
+#endif
 	}
 
   /**@brief Return a scaled version of this quaternion
@@ -169,10 +351,18 @@ public:
 	SIMD_FORCE_INLINE btQuaternion
 	operator*(const btScalar& s) const
 	{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
+		vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
+		
+		return btQuaternion(_mm_mul_ps(mVec128, vs));
+#elif defined(BT_USE_NEON)
+		return btQuaternion(vmulq_n_f32(mVec128, s));
+#else
 		return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
+#endif
 	}
 
-
   /**@brief Return an inversely scaled versionof this quaternion
    * @param s The inverse scale factor */
 	btQuaternion operator/(const btScalar& s) const
@@ -212,17 +402,24 @@ public:
 	/**@brief Return the axis of the rotation represented by this quaternion */
 	btVector3 getAxis() const
 	{
-		btScalar s_squared = btScalar(1.) - btPow(m_floats[3], btScalar(2.));
+		btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
+		
 		if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
 			return btVector3(1.0, 0.0, 0.0);  // Arbitrary
-		btScalar s = btSqrt(s_squared);
-		return btVector3(m_floats[0] / s, m_floats[1] / s, m_floats[2] / s);
+		btScalar s = 1.f/btSqrt(s_squared);
+		return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
 	}
 
 	/**@brief Return the inverse of this quaternion */
 	btQuaternion inverse() const
 	{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		return btQuaternion(_mm_xor_ps(mVec128, vQInv));
+#elif defined(BT_USE_NEON)
+        return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
+#else	
 		return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
+#endif
 	}
 
   /**@brief Return the sum of this quaternion and the other 
@@ -230,8 +427,14 @@ public:
 	SIMD_FORCE_INLINE btQuaternion
 	operator+(const btQuaternion& q2) const
 	{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
+#elif defined(BT_USE_NEON)
+        return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
+#else	
 		const btQuaternion& q1 = *this;
 		return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
+#endif
 	}
 
   /**@brief Return the difference between this quaternion and the other 
@@ -239,16 +442,28 @@ public:
 	SIMD_FORCE_INLINE btQuaternion
 	operator-(const btQuaternion& q2) const
 	{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
+#elif defined(BT_USE_NEON)
+        return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
+#else	
 		const btQuaternion& q1 = *this;
 		return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
+#endif
 	}
 
   /**@brief Return the negative of this quaternion 
    * This simply negates each element */
 	SIMD_FORCE_INLINE btQuaternion operator-() const
 	{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
+#elif defined(BT_USE_NEON)
+		return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
+#else	
 		const btQuaternion& q2 = *this;
 		return btQuaternion( - q2.x(), - q2.y(),  - q2.z(),  - q2.m_floats[3]);
+#endif
 	}
   /**@todo document this and it's use */
 	SIMD_FORCE_INLINE btQuaternion farthest( const btQuaternion& qd) const 
@@ -279,16 +494,25 @@ public:
    * Slerp interpolates assuming constant velocity.  */
 	btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
 	{
-		btScalar theta = angle(q);
-		if (theta != btScalar(0.0))
+	  btScalar magnitude = btSqrt(length2() * q.length2()); 
+	  btAssert(magnitude > btScalar(0));
+
+    btScalar product = dot(q) / magnitude;
+    if (btFabs(product) != btScalar(1))
 		{
-			btScalar d = btScalar(1.0) / btSin(theta);
-			btScalar s0 = btSin((btScalar(1.0) - t) * theta);
-			btScalar s1 = btSin(t * theta);   
-			return btQuaternion((m_floats[0] * s0 + q.x() * s1) * d,
-				(m_floats[1] * s0 + q.y() * s1) * d,
-				(m_floats[2] * s0 + q.z() * s1) * d,
-				(m_floats[3] * s0 + q.m_floats[3] * s1) * d);
+      // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
+      const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
+
+      const btScalar theta = btAcos(sign * product);
+      const btScalar s1 = btSin(sign * t * theta);   
+      const btScalar d = btScalar(1.0) / btSin(theta);
+      const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
+
+      return btQuaternion(
+          (m_floats[0] * s0 + q.x() * s1) * d,
+          (m_floats[1] * s0 + q.y() * s1) * d,
+          (m_floats[2] * s0 + q.z() * s1) * d,
+          (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
 		}
 		else
 		{
@@ -308,40 +532,262 @@ public:
 };
 
 
-/**@brief Return the negative of a quaternion */
-SIMD_FORCE_INLINE btQuaternion
-operator-(const btQuaternion& q)
-{
-	return btQuaternion(-q.x(), -q.y(), -q.z(), -q.w());
-}
 
 
 
 /**@brief Return the product of two quaternions */
 SIMD_FORCE_INLINE btQuaternion
-operator*(const btQuaternion& q1, const btQuaternion& q2) {
-	return btQuaternion(q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
+operator*(const btQuaternion& q1, const btQuaternion& q2) 
+{
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	__m128 vQ1 = q1.get128();
+	__m128 vQ2 = q2.get128();
+	__m128 A0, A1, B1, A2, B2;
+    
+	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y  z x     //      vtrn
+	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W  W X     // vdup vext
+
+	A1 = A1 * B1;
+	
+	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z  X Y     // vext 
+	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x  Y Y     // vtrn vdup
+
+	A2 = A2 * B2;
+
+	B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z      // vtrn vext
+	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z      // vext vtrn
+	
+	B1 = B1 * B2;	//	A3 *= B3
+
+	A0 = bt_splat_ps(vQ1, 3);	//	A0
+	A0 = A0 * vQ2;	//	A0 * B0
+
+	A1 = A1 + A2;	//	AB12
+	A0 =  A0 - B1;	//	AB03 = AB0 - AB3 
+	
+    A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
+	A0 = A0 + A1;	//	AB03 + AB12
+	
+	return btQuaternion(A0);
+
+#elif defined(BT_USE_NEON)     
+
+	float32x4_t vQ1 = q1.get128();
+	float32x4_t vQ2 = q2.get128();
+	float32x4_t A0, A1, B1, A2, B2, A3, B3;
+    float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+    
+    {
+    float32x2x2_t tmp;
+    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
+    vQ1zx = tmp.val[0];
+
+    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
+    vQ2zx = tmp.val[0];
+    }
+    vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
+
+    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+    A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
+    B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
+
+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
+    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
+
+	A1 = vmulq_f32(A1, B1);
+	A2 = vmulq_f32(A2, B2);
+	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
+	A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); //	A0 * B0
+
+	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
+	A0 = vsubq_f32(A0, A3);	//	AB03 = AB0 - AB3 
+	
+    //	change the sign of the last element
+    A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);	
+	A0 = vaddq_f32(A0, A1);	//	AB03 + AB12
+	
+	return btQuaternion(A0);
+
+#else
+	return btQuaternion(
+        q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
 		q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
 		q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
 		q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z()); 
+#endif
 }
 
 SIMD_FORCE_INLINE btQuaternion
 operator*(const btQuaternion& q, const btVector3& w)
 {
-	return btQuaternion( q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
-		q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
-		q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	__m128 vQ1 = q.get128();
+	__m128 vQ2 = w.get128();
+	__m128 A1, B1, A2, B2, A3, B3;
+	
+	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
+	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
+
+	A1 = A1 * B1;
+	
+	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
+	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
+
+	A2 = A2 * B2;
+
+	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
+	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
+	
+	A3 = A3 * B3;	//	A3 *= B3
+
+	A1 = A1 + A2;	//	AB12
+	A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
+    A1 = A1 - A3;	//	AB123 = AB12 - AB3 
+	
+	return btQuaternion(A1);
+    
+#elif defined(BT_USE_NEON)     
+
+	float32x4_t vQ1 = q.get128();
+	float32x4_t vQ2 = w.get128();
+	float32x4_t A1, B1, A2, B2, A3, B3;
+    float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
+    
+    vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1); 
+    {
+    float32x2x2_t tmp;
+
+    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
+    vQ2zx = tmp.val[0];
+
+    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
+    vQ1zx = tmp.val[0];
+    }
+
+    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+    A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W  W X 
+    B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);                    // X Y  z x 
+
+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
+    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
+
+	A1 = vmulq_f32(A1, B1);
+	A2 = vmulq_f32(A2, B2);
+	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
+
+	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
+	
+    //	change the sign of the last element
+    A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);	
+	
+    A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
+	
+	return btQuaternion(A1);
+    
+#else
+	return btQuaternion( 
+         q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
+		 q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
+		 q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
 		-q.x() * w.x() - q.y() * w.y() - q.z() * w.z()); 
+#endif
 }
 
 SIMD_FORCE_INLINE btQuaternion
 operator*(const btVector3& w, const btQuaternion& q)
 {
-	return btQuaternion( w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
-		w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
-		w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	__m128 vQ1 = w.get128();
+	__m128 vQ2 = q.get128();
+	__m128 A1, B1, A2, B2, A3, B3;
+	
+	A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));  // X Y  z x
+	B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));  // W W  W X 
+
+	A1 = A1 * B1;
+	
+	A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
+	B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
+
+	A2 = A2 *B2;
+
+	A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
+	B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
+	
+	A3 = A3 * B3;	//	A3 *= B3
+
+	A1 = A1 + A2;	//	AB12
+	A1 = _mm_xor_ps(A1, vPPPM);	//	change sign of the last element
+	A1 = A1 - A3;	//	AB123 = AB12 - AB3 
+	
+	return btQuaternion(A1);
+
+#elif defined(BT_USE_NEON)     
+
+	float32x4_t vQ1 = w.get128();
+	float32x4_t vQ2 = q.get128();
+	float32x4_t  A1, B1, A2, B2, A3, B3;
+    float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
+    
+    {
+    float32x2x2_t tmp;
+   
+    tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );       // {z x}, {w y}
+    vQ1zx = tmp.val[0];
+
+    tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );       // {z x}, {w y}
+    vQ2zx = tmp.val[0];
+    }
+    vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1); 
+
+    vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
+
+    vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
+    vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
+
+    A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);                    // X Y  z x 
+    B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W  W X 
+
+	A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
+    B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
+
+    A3 = vcombine_f32(vQ1zx, vQ1yz);        // Z X Y Z
+    B3 = vcombine_f32(vQ2yz, vQ2xz);        // Y Z x z
+
+	A1 = vmulq_f32(A1, B1);
+	A2 = vmulq_f32(A2, B2);
+	A3 = vmulq_f32(A3, B3);	//	A3 *= B3
+
+	A1 = vaddq_f32(A1, A2);	//	AB12 = AB1 + AB2
+	
+    //	change the sign of the last element
+    A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);	
+	
+    A1 = vsubq_f32(A1, A3);	//	AB123 = AB12 - AB3
+	
+	return btQuaternion(A1);
+    
+#else
+	return btQuaternion( 
+        +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
+		+w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
+		+w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
 		-w.x() * q.x() - w.y() * q.y() - w.z() * q.z()); 
+#endif
 }
 
 /**@brief Calculate the dot product between two quaternions */
@@ -361,7 +807,7 @@ length(const btQuaternion& q)
 
 /**@brief Return the angle between two quaternions*/
 SIMD_FORCE_INLINE btScalar
-angle(const btQuaternion& q1, const btQuaternion& q2) 
+btAngle(const btQuaternion& q1, const btQuaternion& q2) 
 { 
 	return q1.angle(q2); 
 }
@@ -389,7 +835,13 @@ quatRotate(const btQuaternion& rotation, const btVector3& v)
 {
 	btQuaternion q = rotation * v;
 	q *= rotation.inverse();
+#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
+#elif defined(BT_USE_NEON)
+    return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
+#else	
 	return btVector3(q.getX(),q.getY(),q.getZ());
+#endif
 }
 
 SIMD_FORCE_INLINE btQuaternion 
@@ -419,8 +871,7 @@ shortestArcQuatNormalize2(btVector3& v0,btVector3& v1)
 	return shortestArcQuat(v0,v1);
 }
 
-#endif
-
+#endif //BT_SIMD__QUATERNION_H_
 
 
 
diff --git a/Engine/lib/bullet/src/LinearMath/btQuickprof.cpp b/Engine/lib/bullet/src/LinearMath/btQuickprof.cpp
index fa45d02b3..544aee89d 100644
--- a/Engine/lib/bullet/src/LinearMath/btQuickprof.cpp
+++ b/Engine/lib/bullet/src/LinearMath/btQuickprof.cpp
@@ -1,6 +1,6 @@
 /*
 
-/***************************************************************************************************
+***************************************************************************************************
 **
 ** profile.cpp
 **
@@ -13,13 +13,232 @@
 // Credits: The Clock class was inspired by the Timer classes in 
 // Ogre (www.ogre3d.org).
 
-#include "LinearMath/btQuickprof.h"
+#include "btQuickprof.h"
 
+#ifndef BT_NO_PROFILE
 
-#ifdef USE_BT_CLOCK
 
 static btClock gProfileClock;
 
+
+#ifdef __CELLOS_LV2__
+#include <sys/sys_time.h>
+#include <sys/time_util.h>
+#include <stdio.h>
+#endif
+
+#if defined (SUNOS) || defined (__SUNOS__) 
+#include <stdio.h> 
+#endif
+
+#if defined(WIN32) || defined(_WIN32)
+
+#define BT_USE_WINDOWS_TIMERS
+#define WIN32_LEAN_AND_MEAN
+#define NOWINRES
+#define NOMCX
+#define NOIME 
+
+#ifdef _XBOX
+	#include <Xtl.h>
+#else //_XBOX
+	#include <windows.h>
+#endif //_XBOX
+
+#include <time.h>
+
+
+#else //_WIN32
+#include <sys/time.h>
+#endif //_WIN32
+
+#define mymin(a,b) (a > b ? a : b)
+
+struct btClockData
+{
+
+#ifdef BT_USE_WINDOWS_TIMERS
+	LARGE_INTEGER mClockFrequency;
+	DWORD mStartTick;
+	LONGLONG mPrevElapsedTime;
+	LARGE_INTEGER mStartTime;
+#else
+#ifdef __CELLOS_LV2__
+	uint64_t	mStartTime;
+#else
+	struct timeval mStartTime;
+#endif
+#endif //__CELLOS_LV2__
+
+};
+
+///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
+btClock::btClock()
+{
+	m_data = new btClockData;
+#ifdef BT_USE_WINDOWS_TIMERS
+	QueryPerformanceFrequency(&m_data->mClockFrequency);
+#endif
+	reset();
+}
+
+btClock::~btClock()
+{
+	delete m_data;
+}
+
+btClock::btClock(const btClock& other)
+{
+	m_data = new btClockData;
+	*m_data = *other.m_data;
+}
+
+btClock& btClock::operator=(const btClock& other)
+{
+	*m_data = *other.m_data;
+	return *this;
+}
+
+
+	/// Resets the initial reference time.
+void btClock::reset()
+{
+#ifdef BT_USE_WINDOWS_TIMERS
+	QueryPerformanceCounter(&m_data->mStartTime);
+	m_data->mStartTick = GetTickCount();
+	m_data->mPrevElapsedTime = 0;
+#else
+#ifdef __CELLOS_LV2__
+
+	typedef uint64_t  ClockSize;
+	ClockSize newTime;
+	//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
+	SYS_TIMEBASE_GET( newTime );
+	m_data->mStartTime = newTime;
+#else
+	gettimeofday(&m_data->mStartTime, 0);
+#endif
+#endif
+}
+
+/// Returns the time in ms since the last call to reset or since 
+/// the btClock was created.
+unsigned long int btClock::getTimeMilliseconds()
+{
+#ifdef BT_USE_WINDOWS_TIMERS
+	LARGE_INTEGER currentTime;
+	QueryPerformanceCounter(&currentTime);
+	LONGLONG elapsedTime = currentTime.QuadPart - 
+		m_data->mStartTime.QuadPart;
+		// Compute the number of millisecond ticks elapsed.
+	unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
+		m_data->mClockFrequency.QuadPart);
+		// Check for unexpected leaps in the Win32 performance counter.  
+	// (This is caused by unexpected data across the PCI to ISA 
+		// bridge, aka south bridge.  See Microsoft KB274323.)
+		unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
+		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
+		if (msecOff < -100 || msecOff > 100)
+		{
+			// Adjust the starting time forwards.
+			LONGLONG msecAdjustment = mymin(msecOff * 
+				m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 
+				m_data->mPrevElapsedTime);
+			m_data->mStartTime.QuadPart += msecAdjustment;
+			elapsedTime -= msecAdjustment;
+
+			// Recompute the number of millisecond ticks elapsed.
+			msecTicks = (unsigned long)(1000 * elapsedTime / 
+				m_data->mClockFrequency.QuadPart);
+		}
+
+		// Store the current elapsed time for adjustments next time.
+		m_data->mPrevElapsedTime = elapsedTime;
+
+		return msecTicks;
+#else
+
+#ifdef __CELLOS_LV2__
+		uint64_t freq=sys_time_get_timebase_frequency();
+		double dFreq=((double) freq) / 1000.0;
+		typedef uint64_t  ClockSize;
+		ClockSize newTime;
+		SYS_TIMEBASE_GET( newTime );
+		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
+
+		return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
+#else
+
+		struct timeval currentTime;
+		gettimeofday(&currentTime, 0);
+		return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000 + 
+			(currentTime.tv_usec - m_data->mStartTime.tv_usec) / 1000;
+#endif //__CELLOS_LV2__
+#endif
+}
+
+	/// Returns the time in us since the last call to reset or since 
+	/// the Clock was created.
+unsigned long int btClock::getTimeMicroseconds()
+{
+#ifdef BT_USE_WINDOWS_TIMERS
+		LARGE_INTEGER currentTime;
+		QueryPerformanceCounter(&currentTime);
+		LONGLONG elapsedTime = currentTime.QuadPart - 
+			m_data->mStartTime.QuadPart;
+
+		// Compute the number of millisecond ticks elapsed.
+		unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
+			m_data->mClockFrequency.QuadPart);
+
+		// Check for unexpected leaps in the Win32 performance counter.  
+		// (This is caused by unexpected data across the PCI to ISA 
+		// bridge, aka south bridge.  See Microsoft KB274323.)
+		unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
+		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
+		if (msecOff < -100 || msecOff > 100)
+		{
+			// Adjust the starting time forwards.
+			LONGLONG msecAdjustment = mymin(msecOff * 
+				m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 
+				m_data->mPrevElapsedTime);
+			m_data->mStartTime.QuadPart += msecAdjustment;
+			elapsedTime -= msecAdjustment;
+		}
+
+		// Store the current elapsed time for adjustments next time.
+		m_data->mPrevElapsedTime = elapsedTime;
+
+		// Convert to microseconds.
+		unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / 
+			m_data->mClockFrequency.QuadPart);
+
+		return usecTicks;
+#else
+
+#ifdef __CELLOS_LV2__
+		uint64_t freq=sys_time_get_timebase_frequency();
+		double dFreq=((double) freq)/ 1000000.0;
+		typedef uint64_t  ClockSize;
+		ClockSize newTime;
+		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
+		SYS_TIMEBASE_GET( newTime );
+
+		return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
+#else
+
+		struct timeval currentTime;
+		gettimeofday(&currentTime, 0);
+		return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + 
+			(currentTime.tv_usec - m_data->mStartTime.tv_usec);
+#endif//__CELLOS_LV2__
+#endif 
+}
+
+
+
+
+
 inline void Profile_Get_Ticks(unsigned long int * ticks)
 {
 	*ticks = gProfileClock.getTimeMicroseconds();
@@ -57,7 +276,8 @@ CProfileNode::CProfileNode( const char * name, CProfileNode * parent ) :
 	RecursionCounter( 0 ),
 	Parent( parent ),
 	Child( NULL ),
-	Sibling( NULL )
+	Sibling( NULL ),
+	m_userPtr(0)
 {
 	Reset();
 }
@@ -342,5 +562,5 @@ void	CProfileManager::dumpAll()
 
 
 
-#endif //USE_BT_CLOCK
 
+#endif //BT_NO_PROFILE
diff --git a/Engine/lib/bullet/src/LinearMath/btQuickprof.h b/Engine/lib/bullet/src/LinearMath/btQuickprof.h
index f8d47c368..93f3f4a60 100644
--- a/Engine/lib/bullet/src/LinearMath/btQuickprof.h
+++ b/Engine/lib/bullet/src/LinearMath/btQuickprof.h
@@ -12,222 +12,48 @@
 
 
 
-#ifndef QUICK_PROF_H
-#define QUICK_PROF_H
+#ifndef BT_QUICK_PROF_H
+#define BT_QUICK_PROF_H
 
 //To disable built-in profiling, please comment out next line
 //#define BT_NO_PROFILE 1
 #ifndef BT_NO_PROFILE
-
+#include <stdio.h>//@todo remove this, backwards compatibility
 #include "btScalar.h"
-#include "LinearMath/btAlignedAllocator.h"
+#include "btAlignedAllocator.h"
 #include <new>
 
 
 
 
-//if you don't need btClock, you can comment next line
+
 #define USE_BT_CLOCK 1
 
 #ifdef USE_BT_CLOCK
-#ifdef __CELLOS_LV2__
-#include <sys/sys_time.h>
-#include <sys/time_util.h>
-#include <stdio.h>
-#endif
-
-#if defined (SUNOS) || defined (__SUNOS__) 
-#include <stdio.h> 
-#endif
-
-#if defined(WIN32) || defined(_WIN32)
-
-#define USE_WINDOWS_TIMERS 
-#define WIN32_LEAN_AND_MEAN 
-#define NOWINRES 
-#define NOMCX 
-#define NOIME 
-#ifdef _XBOX
-#include <Xtl.h>
-#else
-#include <windows.h>
-#endif
-#include <time.h>
-
-#else
-#include <sys/time.h>
-#endif
-
-#define mymin(a,b) (a > b ? a : b)
 
 ///The btClock is a portable basic clock that measures accurate time in seconds, use for profiling.
 class btClock
 {
 public:
-	btClock()
-	{
-#ifdef USE_WINDOWS_TIMERS
-		QueryPerformanceFrequency(&mClockFrequency);
-#endif
-		reset();
-	}
+	btClock();
 
-	~btClock()
-	{
-	}
+	btClock(const btClock& other);
+	btClock& operator=(const btClock& other);
+
+	~btClock();
 
 	/// Resets the initial reference time.
-	void reset()
-	{
-#ifdef USE_WINDOWS_TIMERS
-		QueryPerformanceCounter(&mStartTime);
-		mStartTick = GetTickCount();
-		mPrevElapsedTime = 0;
-#else
-#ifdef __CELLOS_LV2__
-
-		typedef uint64_t  ClockSize;
-		ClockSize newTime;
-		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
-		SYS_TIMEBASE_GET( newTime );
-		mStartTime = newTime;
-#else
-		gettimeofday(&mStartTime, 0);
-#endif
-
-#endif
-	}
+	void reset();
 
 	/// Returns the time in ms since the last call to reset or since 
 	/// the btClock was created.
-	unsigned long int getTimeMilliseconds()
-	{
-#ifdef USE_WINDOWS_TIMERS
-		LARGE_INTEGER currentTime;
-		QueryPerformanceCounter(&currentTime);
-		LONGLONG elapsedTime = currentTime.QuadPart - 
-			mStartTime.QuadPart;
-
-		// Compute the number of millisecond ticks elapsed.
-		unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
-			mClockFrequency.QuadPart);
-
-		// Check for unexpected leaps in the Win32 performance counter.  
-		// (This is caused by unexpected data across the PCI to ISA 
-		// bridge, aka south bridge.  See Microsoft KB274323.)
-		unsigned long elapsedTicks = GetTickCount() - mStartTick;
-		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
-		if (msecOff < -100 || msecOff > 100)
-		{
-			// Adjust the starting time forwards.
-			LONGLONG msecAdjustment = mymin(msecOff * 
-				mClockFrequency.QuadPart / 1000, elapsedTime - 
-				mPrevElapsedTime);
-			mStartTime.QuadPart += msecAdjustment;
-			elapsedTime -= msecAdjustment;
-
-			// Recompute the number of millisecond ticks elapsed.
-			msecTicks = (unsigned long)(1000 * elapsedTime / 
-				mClockFrequency.QuadPart);
-		}
-
-		// Store the current elapsed time for adjustments next time.
-		mPrevElapsedTime = elapsedTime;
-
-		return msecTicks;
-#else
-
-#ifdef __CELLOS_LV2__
-		uint64_t freq=sys_time_get_timebase_frequency();
-		double dFreq=((double) freq) / 1000.0;
-		typedef uint64_t  ClockSize;
-		ClockSize newTime;
-		SYS_TIMEBASE_GET( newTime );
-		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
-
-		return (unsigned long int)((double(newTime-mStartTime)) / dFreq);
-#else
-
-		struct timeval currentTime;
-		gettimeofday(&currentTime, 0);
-		return (currentTime.tv_sec - mStartTime.tv_sec) * 1000 + 
-			(currentTime.tv_usec - mStartTime.tv_usec) / 1000;
-#endif //__CELLOS_LV2__
-#endif
-	}
+	unsigned long int getTimeMilliseconds();
 
 	/// Returns the time in us since the last call to reset or since 
 	/// the Clock was created.
-	unsigned long int getTimeMicroseconds()
-	{
-#ifdef USE_WINDOWS_TIMERS
-		LARGE_INTEGER currentTime;
-		QueryPerformanceCounter(&currentTime);
-		LONGLONG elapsedTime = currentTime.QuadPart - 
-			mStartTime.QuadPart;
-
-		// Compute the number of millisecond ticks elapsed.
-		unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
-			mClockFrequency.QuadPart);
-
-		// Check for unexpected leaps in the Win32 performance counter.  
-		// (This is caused by unexpected data across the PCI to ISA 
-		// bridge, aka south bridge.  See Microsoft KB274323.)
-		unsigned long elapsedTicks = GetTickCount() - mStartTick;
-		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
-		if (msecOff < -100 || msecOff > 100)
-		{
-			// Adjust the starting time forwards.
-			LONGLONG msecAdjustment = mymin(msecOff * 
-				mClockFrequency.QuadPart / 1000, elapsedTime - 
-				mPrevElapsedTime);
-			mStartTime.QuadPart += msecAdjustment;
-			elapsedTime -= msecAdjustment;
-		}
-
-		// Store the current elapsed time for adjustments next time.
-		mPrevElapsedTime = elapsedTime;
-
-		// Convert to microseconds.
-		unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / 
-			mClockFrequency.QuadPart);
-
-		return usecTicks;
-#else
-
-#ifdef __CELLOS_LV2__
-		uint64_t freq=sys_time_get_timebase_frequency();
-		double dFreq=((double) freq)/ 1000000.0;
-		typedef uint64_t  ClockSize;
-		ClockSize newTime;
-		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
-		SYS_TIMEBASE_GET( newTime );
-
-		return (unsigned long int)((double(newTime-mStartTime)) / dFreq);
-#else
-
-		struct timeval currentTime;
-		gettimeofday(&currentTime, 0);
-		return (currentTime.tv_sec - mStartTime.tv_sec) * 1000000 + 
-			(currentTime.tv_usec - mStartTime.tv_usec);
-#endif//__CELLOS_LV2__
-#endif 
-	}
-
+	unsigned long int getTimeMicroseconds();
 private:
-#ifdef USE_WINDOWS_TIMERS
-	LARGE_INTEGER mClockFrequency;
-	DWORD mStartTick;
-	LONGLONG mPrevElapsedTime;
-	LARGE_INTEGER mStartTime;
-#else
-#ifdef __CELLOS_LV2__
-	uint64_t	mStartTime;
-#else
-	struct timeval mStartTime;
-#endif
-#endif //__CELLOS_LV2__
-
+	struct btClockData* m_data;
 };
 
 #endif //USE_BT_CLOCK
@@ -256,7 +82,8 @@ public:
 	const char *	Get_Name( void )				{ return Name; }
 	int				Get_Total_Calls( void )		{ return TotalCalls; }
 	float				Get_Total_Time( void )		{ return TotalTime; }
-
+	void*			GetUserPointer() const {return m_userPtr;}
+	void			SetUserPointer(void* ptr) { m_userPtr = ptr;}
 protected:
 
 	const char *	Name;
@@ -268,6 +95,7 @@ protected:
 	CProfileNode *	Parent;
 	CProfileNode *	Child;
 	CProfileNode *	Sibling;
+	void*	m_userPtr;
 };
 
 ///An iterator to navigate through the tree
@@ -289,15 +117,20 @@ public:
 	int				Get_Current_Total_Calls( void )	{ return CurrentChild->Get_Total_Calls(); }
 	float				Get_Current_Total_Time( void )	{ return CurrentChild->Get_Total_Time(); }
 
+	void*	Get_Current_UserPointer( void )			{ return CurrentChild->GetUserPointer(); }
+	void	Set_Current_UserPointer(void* ptr) {CurrentChild->SetUserPointer(ptr);}
 	// Access the current parent
 	const char *	Get_Current_Parent_Name( void )			{ return CurrentParent->Get_Name(); }
 	int				Get_Current_Parent_Total_Calls( void )	{ return CurrentParent->Get_Total_Calls(); }
 	float				Get_Current_Parent_Total_Time( void )	{ return CurrentParent->Get_Total_Time(); }
 
+	
+
 protected:
 
 	CProfileNode *	CurrentParent;
 	CProfileNode *	CurrentChild;
+	
 
 	CProfileIterator( CProfileNode * start );
 	friend	class		CProfileManager;
@@ -365,6 +198,6 @@ public:
 
 
 
-#endif //QUICK_PROF_H
+#endif //BT_QUICK_PROF_H
 
 
diff --git a/Engine/lib/bullet/src/LinearMath/btRandom.h b/Engine/lib/bullet/src/LinearMath/btRandom.h
index fdf65e01c..4cbfc6bfe 100644
--- a/Engine/lib/bullet/src/LinearMath/btRandom.h
+++ b/Engine/lib/bullet/src/LinearMath/btRandom.h
@@ -14,8 +14,8 @@ subject to the following restrictions:
 
 
 
-#ifndef GEN_RANDOM_H
-#define GEN_RANDOM_H
+#ifndef BT_GEN_RANDOM_H
+#define BT_GEN_RANDOM_H
 
 #ifdef MT19937
 
@@ -38,5 +38,5 @@ SIMD_FORCE_INLINE unsigned int GEN_rand()                   { return rand(); }
 
 #endif
 
-#endif
+#endif //BT_GEN_RANDOM_H
 
diff --git a/Engine/lib/bullet/src/LinearMath/btScalar.h b/Engine/lib/bullet/src/LinearMath/btScalar.h
index 67dce56e9..7154b8f0b 100644
--- a/Engine/lib/bullet/src/LinearMath/btScalar.h
+++ b/Engine/lib/bullet/src/LinearMath/btScalar.h
@@ -14,17 +14,21 @@ subject to the following restrictions:
 
 
 
-#ifndef SIMD___SCALAR_H
-#define SIMD___SCALAR_H
+#ifndef BT_SCALAR_H
+#define BT_SCALAR_H
+
+#ifdef BT_MANAGED_CODE
+//Aligned data types not supported in managed code
+#pragma unmanaged
+#endif
+
 
 #include <math.h>
 #include <stdlib.h>//size_t for MSVC 6.0
-#include <cstdlib>
-#include <cfloat>
 #include <float.h>
 
 /* SVN $Revision$ on $Date$ from http://bullet.googlecode.com*/
-#define BT_BULLET_VERSION 275
+#define BT_BULLET_VERSION 281
 
 inline int	btGetVersion()
 {
@@ -36,12 +40,13 @@ inline int	btGetVersion()
 #endif
 
 
-#ifdef WIN32
+#ifdef _WIN32
 
 		#if defined(__MINGW32__) || defined(__CYGWIN__) || (defined (_MSC_VER) && _MSC_VER < 1300)
 
 			#define SIMD_FORCE_INLINE inline
 			#define ATTRIBUTE_ALIGNED16(a) a
+			#define ATTRIBUTE_ALIGNED64(a) a
 			#define ATTRIBUTE_ALIGNED128(a) a
 		#else
 			//#define BT_HAS_ALIGNED_ALLOCATOR
@@ -52,6 +57,7 @@ inline int	btGetVersion()
 
 			#define SIMD_FORCE_INLINE __forceinline
 			#define ATTRIBUTE_ALIGNED16(a) __declspec(align(16)) a
+			#define ATTRIBUTE_ALIGNED64(a) __declspec(align(64)) a
 			#define ATTRIBUTE_ALIGNED128(a) __declspec (align(128)) a
 		#ifdef _XBOX
 			#define BT_USE_VMX128
@@ -61,8 +67,17 @@ inline int	btGetVersion()
  			#define btFsel(a,b,c) __fsel((a),(b),(c))
 		#else
 
-#if (defined (WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
+#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400) && (!defined (BT_USE_DOUBLE_PRECISION))
 			#define BT_USE_SSE
+			#ifdef BT_USE_SSE
+			//BT_USE_SSE_IN_API is disabled under Windows by default, because 
+			//it makes it harder to integrate Bullet into your application under Windows 
+			//(structured embedding Bullet structs/classes need to be 16-byte aligned)
+			//with relatively little performance gain
+			//If you are not embedded Bullet data in your classes, or make sure that you align those classes on 16-byte boundaries
+			//you can manually enable this line or set it in the build system for a bit of performance gain (a few percent, dependent on usage)
+			//#define BT_USE_SSE_IN_API
+			#endif //BT_USE_SSE
 			#include <emmintrin.h>
 #endif
 
@@ -70,9 +85,14 @@ inline int	btGetVersion()
 
 		#endif //__MINGW32__
 
-		#include <assert.h>
 #ifdef BT_DEBUG
+	#ifdef _MSC_VER
+		#include <stdio.h>
+		#define btAssert(x) { if(!(x)){printf("Assert "__FILE__ ":%u ("#x")\n", __LINE__);__debugbreak();	}}
+	#else//_MSC_VER
+		#include <assert.h>
 		#define btAssert assert
+	#endif//_MSC_VER
 #else
 		#define btAssert(x)
 #endif
@@ -85,14 +105,22 @@ inline int	btGetVersion()
 #else
 	
 #if defined	(__CELLOS_LV2__)
-		#define SIMD_FORCE_INLINE inline
+		#define SIMD_FORCE_INLINE inline __attribute__((always_inline))
 		#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+		#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
 		#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
 		#ifndef assert
 		#include <assert.h>
 		#endif
 #ifdef BT_DEBUG
-		#define btAssert assert
+#ifdef __SPU__
+#include <spu_printf.h>
+#define printf spu_printf
+	#define btAssert(x) {if(!(x)){printf("Assert "__FILE__ ":%u ("#x")\n", __LINE__);spu_hcmpeq(0,0);}}
+#else
+	#define btAssert assert
+#endif
+	
 #else
 		#define btAssert(x)
 #endif
@@ -108,6 +136,7 @@ inline int	btGetVersion()
 
 		#define SIMD_FORCE_INLINE __inline
 		#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+		#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
 		#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
 		#ifndef assert
 		#include <assert.h>
@@ -128,23 +157,61 @@ inline int	btGetVersion()
 #else
 	//non-windows systems
 
-#if (defined (__APPLE__) && defined (__i386__) && (!defined (BT_USE_DOUBLE_PRECISION)))
-	#define BT_USE_SSE
-	#include <emmintrin.h>
+#if (defined (__APPLE__) && (!defined (BT_USE_DOUBLE_PRECISION)))
+    #if defined (__i386__) || defined (__x86_64__)
+        #define BT_USE_SSE
+		//BT_USE_SSE_IN_API is enabled on Mac OSX by default, because memory is automatically aligned on 16-byte boundaries
+		//if apps run into issues, we will disable the next line
+		#define BT_USE_SSE_IN_API
+        #ifdef BT_USE_SSE
+            // include appropriate SSE level
+            #if defined (__SSE4_1__)
+                #include <smmintrin.h>
+            #elif defined (__SSSE3__)
+                #include <tmmintrin.h>
+            #elif defined (__SSE3__)
+                #include <pmmintrin.h>
+            #else
+                #include <emmintrin.h>
+            #endif
+        #endif //BT_USE_SSE
+    #elif defined( __armv7__ )
+        #ifdef __clang__
+            #define BT_USE_NEON 1
 
-	#define SIMD_FORCE_INLINE inline
+            #if defined BT_USE_NEON && defined (__clang__)
+                #include <arm_neon.h>
+            #endif//BT_USE_NEON
+       #endif //__clang__
+    #endif//__arm__
+
+	#define SIMD_FORCE_INLINE inline __attribute__ ((always_inline))
 ///@todo: check out alignment methods for other platforms/compilers
 	#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+	#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
 	#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
 	#ifndef assert
 	#include <assert.h>
 	#endif
 
 	#if defined(DEBUG) || defined (_DEBUG)
+	 #if defined (__i386__) || defined (__x86_64__)
+	#include <stdio.h>
+	 #define btAssert(x)\
+	{\
+	if(!(x))\
+	{\
+		printf("Assert %s in line %d, file %s\n",#x, __LINE__, __FILE__);\
+		asm volatile ("int3");\
+	}\
+	}
+	#else//defined (__i386__) || defined (__x86_64__)
 		#define btAssert assert
-	#else
-		#define btAssert(x)
+	#end//defined (__i386__) || defined (__x86_64__)
 	#endif
+	#else//defined(DEBUG) || defined (_DEBUG)
+		#define btAssert(x)
+	#endif//defined(DEBUG) || defined (_DEBUG)
 
 	//btFullAssert is optional, slows down a lot
 	#define btFullAssert(x)
@@ -156,8 +223,10 @@ inline int	btGetVersion()
 		#define SIMD_FORCE_INLINE inline
 		///@todo: check out alignment methods for other platforms/compilers
 		///#define ATTRIBUTE_ALIGNED16(a) a __attribute__ ((aligned (16)))
+		///#define ATTRIBUTE_ALIGNED64(a) a __attribute__ ((aligned (64)))
 		///#define ATTRIBUTE_ALIGNED128(a) a __attribute__ ((aligned (128)))
 		#define ATTRIBUTE_ALIGNED16(a) a
+		#define ATTRIBUTE_ALIGNED64(a) a
 		#define ATTRIBUTE_ALIGNED128(a) a
 		#ifndef assert
 		#include <assert.h>
@@ -192,6 +261,69 @@ typedef float btScalar;
 #define BT_LARGE_FLOAT 1e18f
 #endif
 
+#ifdef BT_USE_SSE
+typedef __m128 btSimdFloat4;
+#endif//BT_USE_SSE
+
+#if defined BT_USE_SSE_IN_API && defined (BT_USE_SSE)
+#ifdef _WIN32
+
+#ifndef BT_NAN
+static int btNanMask = 0x7F800001;
+#define BT_NAN (*(float*)&btNanMask)
+#endif
+
+#ifndef BT_INFINITY
+static  int btInfinityMask = 0x7F800000;
+#define BT_INFINITY (*(float*)&btInfinityMask)
+#endif
+
+inline __m128 operator + (const __m128 A, const __m128 B)
+{
+    return _mm_add_ps(A, B);
+}
+
+inline __m128 operator - (const __m128 A, const __m128 B)
+{
+    return _mm_sub_ps(A, B);
+}
+
+inline __m128 operator * (const __m128 A, const __m128 B)
+{
+    return _mm_mul_ps(A, B);
+}
+
+#define btCastfTo128i(a) (_mm_castps_si128(a))
+#define btCastfTo128d(a) (_mm_castps_pd(a))
+#define btCastiTo128f(a) (_mm_castsi128_ps(a))
+#define btCastdTo128f(a) (_mm_castpd_ps(a))
+#define btCastdTo128i(a) (_mm_castpd_si128(a))
+#define btAssign128(r0,r1,r2,r3) _mm_setr_ps(r0,r1,r2,r3)
+
+#else//_WIN32
+
+#define btCastfTo128i(a) ((__m128i)(a))
+#define btCastfTo128d(a) ((__m128d)(a))
+#define btCastiTo128f(a)  ((__m128) (a))
+#define btCastdTo128f(a) ((__m128) (a))
+#define btCastdTo128i(a) ((__m128i)(a))
+#define btAssign128(r0,r1,r2,r3) (__m128){r0,r1,r2,r3}
+#define BT_INFINITY INFINITY
+#define BT_NAN NAN
+#endif//_WIN32
+#endif //BT_USE_SSE_IN_API
+
+#ifdef BT_USE_NEON
+#include <arm_neon.h>
+
+typedef float32x4_t btSimdFloat4;
+#define BT_INFINITY INFINITY
+#define BT_NAN NAN
+#define btAssign128(r0,r1,r2,r3) (float32x4_t){r0,r1,r2,r3}
+#endif
+
+
+
 
 
 #define BT_DECLARE_ALIGNED_ALLOCATOR() \
@@ -213,8 +345,8 @@ SIMD_FORCE_INLINE btScalar btFabs(btScalar x) { return fabs(x); }
 SIMD_FORCE_INLINE btScalar btCos(btScalar x) { return cos(x); }
 SIMD_FORCE_INLINE btScalar btSin(btScalar x) { return sin(x); }
 SIMD_FORCE_INLINE btScalar btTan(btScalar x) { return tan(x); }
-SIMD_FORCE_INLINE btScalar btAcos(btScalar x) { return acos(x); }
-SIMD_FORCE_INLINE btScalar btAsin(btScalar x) { return asin(x); }
+SIMD_FORCE_INLINE btScalar btAcos(btScalar x) { if (x<btScalar(-1))	x=btScalar(-1); if (x>btScalar(1))	x=btScalar(1); return acos(x); }
+SIMD_FORCE_INLINE btScalar btAsin(btScalar x) { if (x<btScalar(-1))	x=btScalar(-1); if (x>btScalar(1))	x=btScalar(1); return asin(x); }
 SIMD_FORCE_INLINE btScalar btAtan(btScalar x) { return atan(x); }
 SIMD_FORCE_INLINE btScalar btAtan2(btScalar x, btScalar y) { return atan2(x, y); }
 SIMD_FORCE_INLINE btScalar btExp(btScalar x) { return exp(x); }
@@ -233,7 +365,7 @@ SIMD_FORCE_INLINE btScalar btSqrt(btScalar y)
 	tempf = y;
 	*tfptr = (0xbfcdd90a - *tfptr)>>1; /* estimate of 1/sqrt(y) */
 	x =  tempf;
-	z =  y*btScalar(0.5);                        /* hoist out the �/2�    */
+	z =  y*btScalar(0.5);
 	x = (btScalar(1.5)*x)-(x*x)*(x*z);         /* iteration formula     */
 	x = (btScalar(1.5)*x)-(x*x)*(x*z);
 	x = (btScalar(1.5)*x)-(x*x)*(x*z);
@@ -249,10 +381,19 @@ SIMD_FORCE_INLINE btScalar btCos(btScalar x) { return cosf(x); }
 SIMD_FORCE_INLINE btScalar btSin(btScalar x) { return sinf(x); }
 SIMD_FORCE_INLINE btScalar btTan(btScalar x) { return tanf(x); }
 SIMD_FORCE_INLINE btScalar btAcos(btScalar x) { 
-	btAssert(x <= btScalar(1.));
+	if (x<btScalar(-1))	
+		x=btScalar(-1); 
+	if (x>btScalar(1))	
+		x=btScalar(1);
 	return acosf(x); 
 }
-SIMD_FORCE_INLINE btScalar btAsin(btScalar x) { return asinf(x); }
+SIMD_FORCE_INLINE btScalar btAsin(btScalar x) { 
+	if (x<btScalar(-1))	
+		x=btScalar(-1); 
+	if (x>btScalar(1))	
+		x=btScalar(1);
+	return asinf(x); 
+}
 SIMD_FORCE_INLINE btScalar btAtan(btScalar x) { return atanf(x); }
 SIMD_FORCE_INLINE btScalar btAtan2(btScalar x, btScalar y) { return atan2f(x, y); }
 SIMD_FORCE_INLINE btScalar btExp(btScalar x) { return expf(x); }
@@ -492,4 +633,29 @@ struct btTypedObject
 		return m_objectType;
 	}
 };
-#endif //SIMD___SCALAR_H
+
+
+  
+///align a pointer to the provided alignment, upwards
+template <typename T>T* btAlignPointer(T* unalignedPtr, size_t alignment)
+{
+		
+	struct btConvertPointerSizeT
+	{
+		union 
+		{
+				T* ptr;
+				size_t integer;
+		};
+	};
+    btConvertPointerSizeT converter;
+    
+    
+	const size_t bit_mask = ~(alignment - 1);
+    converter.ptr = unalignedPtr;
+	converter.integer += alignment-1;
+	converter.integer &= bit_mask;
+	return converter.ptr;
+}
+
+#endif //BT_SCALAR_H
diff --git a/Engine/lib/bullet/src/LinearMath/btSerializer.cpp b/Engine/lib/bullet/src/LinearMath/btSerializer.cpp
new file mode 100644
index 000000000..d6b2b3a5a
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btSerializer.cpp
@@ -0,0 +1,908 @@
+char sBulletDNAstr[]= {
+char(83),char(68),char(78),char(65),char(78),char(65),char(77),char(69),char(63),char(1),char(0),char(0),char(109),char(95),char(115),char(105),char(122),char(101),char(0),char(109),
+char(95),char(99),char(97),char(112),char(97),char(99),char(105),char(116),char(121),char(0),char(42),char(109),char(95),char(100),char(97),char(116),char(97),char(0),char(109),char(95),
+char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(83),char(104),char(97),char(112),char(101),char(115),char(0),char(109),char(95),char(99),char(111),
+char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),char(116),char(115),char(0),char(109),char(95),char(99),char(111),char(110),
+char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(115),char(0),char(42),char(102),char(105),char(114),char(115),char(116),char(0),char(42),char(108),char(97),char(115),
+char(116),char(0),char(109),char(95),char(102),char(108),char(111),char(97),char(116),char(115),char(91),char(52),char(93),char(0),char(109),char(95),char(101),char(108),char(91),char(51),
+char(93),char(0),char(109),char(95),char(98),char(97),char(115),char(105),char(115),char(0),char(109),char(95),char(111),char(114),char(105),char(103),char(105),char(110),char(0),char(109),
+char(95),char(114),char(111),char(111),char(116),char(78),char(111),char(100),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(115),char(117),char(98),
+char(116),char(114),char(101),char(101),char(83),char(105),char(122),char(101),char(0),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),
+char(65),char(97),char(98),char(98),char(77),char(105),char(110),char(91),char(51),char(93),char(0),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),
+char(101),char(100),char(65),char(97),char(98),char(98),char(77),char(97),char(120),char(91),char(51),char(93),char(0),char(109),char(95),char(97),char(97),char(98),char(98),char(77),
+char(105),char(110),char(79),char(114),char(103),char(0),char(109),char(95),char(97),char(97),char(98),char(98),char(77),char(97),char(120),char(79),char(114),char(103),char(0),char(109),
+char(95),char(101),char(115),char(99),char(97),char(112),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(115),char(117),char(98),char(80),char(97),
+char(114),char(116),char(0),char(109),char(95),char(116),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),
+char(95),char(112),char(97),char(100),char(91),char(52),char(93),char(0),char(109),char(95),char(101),char(115),char(99),char(97),char(112),char(101),char(73),char(110),char(100),char(101),
+char(120),char(79),char(114),char(84),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(98),
+char(118),char(104),char(65),char(97),char(98),char(98),char(77),char(105),char(110),char(0),char(109),char(95),char(98),char(118),char(104),char(65),char(97),char(98),char(98),char(77),
+char(97),char(120),char(0),char(109),char(95),char(98),char(118),char(104),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(97),char(116),char(105),char(111),char(110),
+char(0),char(109),char(95),char(99),char(117),char(114),char(78),char(111),char(100),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(117),char(115),
+char(101),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(97),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(110),char(117),char(109),char(67),
+char(111),char(110),char(116),char(105),char(103),char(117),char(111),char(117),char(115),char(76),char(101),char(97),char(102),char(78),char(111),char(100),char(101),char(115),char(0),char(109),
+char(95),char(110),char(117),char(109),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(67),char(111),char(110),char(116),char(105),char(103),char(117),
+char(111),char(117),char(115),char(78),char(111),char(100),char(101),char(115),char(0),char(42),char(109),char(95),char(99),char(111),char(110),char(116),char(105),char(103),char(117),char(111),
+char(117),char(115),char(78),char(111),char(100),char(101),char(115),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),
+char(122),char(101),char(100),char(67),char(111),char(110),char(116),char(105),char(103),char(117),char(111),char(117),char(115),char(78),char(111),char(100),char(101),char(115),char(80),char(116),
+char(114),char(0),char(42),char(109),char(95),char(115),char(117),char(98),char(84),char(114),char(101),char(101),char(73),char(110),char(102),char(111),char(80),char(116),char(114),char(0),
+char(109),char(95),char(116),char(114),char(97),char(118),char(101),char(114),char(115),char(97),char(108),char(77),char(111),char(100),char(101),char(0),char(109),char(95),char(110),char(117),
+char(109),char(83),char(117),char(98),char(116),char(114),char(101),char(101),char(72),char(101),char(97),char(100),char(101),char(114),char(115),char(0),char(42),char(109),char(95),char(110),
+char(97),char(109),char(101),char(0),char(109),char(95),char(115),char(104),char(97),char(112),char(101),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(112),char(97),
+char(100),char(100),char(105),char(110),char(103),char(91),char(52),char(93),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),
+char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(108),char(111),char(99),char(97),char(108),char(83),char(99),char(97),
+char(108),char(105),char(110),char(103),char(0),char(109),char(95),char(112),char(108),char(97),char(110),char(101),char(78),char(111),char(114),char(109),char(97),char(108),char(0),char(109),
+char(95),char(112),char(108),char(97),char(110),char(101),char(67),char(111),char(110),char(115),char(116),char(97),char(110),char(116),char(0),char(109),char(95),char(105),char(109),char(112),
+char(108),char(105),char(99),char(105),char(116),char(83),char(104),char(97),char(112),char(101),char(68),char(105),char(109),char(101),char(110),char(115),char(105),char(111),char(110),char(115),
+char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(77),char(97),char(114),char(103),char(105),char(110),char(0),char(109),
+char(95),char(112),char(97),char(100),char(100),char(105),char(110),char(103),char(0),char(109),char(95),char(112),char(111),char(115),char(0),char(109),char(95),char(114),char(97),char(100),
+char(105),char(117),char(115),char(0),char(109),char(95),char(99),char(111),char(110),char(118),char(101),char(120),char(73),char(110),char(116),char(101),char(114),char(110),char(97),char(108),
+char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(42),char(109),char(95),char(108),char(111),char(99),char(97),char(108),char(80),char(111),
+char(115),char(105),char(116),char(105),char(111),char(110),char(65),char(114),char(114),char(97),char(121),char(80),char(116),char(114),char(0),char(109),char(95),char(108),char(111),char(99),
+char(97),char(108),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(65),char(114),char(114),char(97),char(121),char(83),char(105),char(122),char(101),char(0),
+char(109),char(95),char(118),char(97),char(108),char(117),char(101),char(0),char(109),char(95),char(112),char(97),char(100),char(91),char(50),char(93),char(0),char(109),char(95),char(118),
+char(97),char(108),char(117),char(101),char(115),char(91),char(51),char(93),char(0),char(109),char(95),char(112),char(97),char(100),char(0),char(42),char(109),char(95),char(118),char(101),
+char(114),char(116),char(105),char(99),char(101),char(115),char(51),char(102),char(0),char(42),char(109),char(95),char(118),char(101),char(114),char(116),char(105),char(99),char(101),char(115),
+char(51),char(100),char(0),char(42),char(109),char(95),char(105),char(110),char(100),char(105),char(99),char(101),char(115),char(51),char(50),char(0),char(42),char(109),char(95),char(51),
+char(105),char(110),char(100),char(105),char(99),char(101),char(115),char(49),char(54),char(0),char(42),char(109),char(95),char(51),char(105),char(110),char(100),char(105),char(99),char(101),
+char(115),char(56),char(0),char(42),char(109),char(95),char(105),char(110),char(100),char(105),char(99),char(101),char(115),char(49),char(54),char(0),char(109),char(95),char(110),char(117),
+char(109),char(84),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(86),char(101),char(114),char(116),
+char(105),char(99),char(101),char(115),char(0),char(42),char(109),char(95),char(109),char(101),char(115),char(104),char(80),char(97),char(114),char(116),char(115),char(80),char(116),char(114),
+char(0),char(109),char(95),char(115),char(99),char(97),char(108),char(105),char(110),char(103),char(0),char(109),char(95),char(110),char(117),char(109),char(77),char(101),char(115),char(104),
+char(80),char(97),char(114),char(116),char(115),char(0),char(109),char(95),char(109),char(101),char(115),char(104),char(73),char(110),char(116),char(101),char(114),char(102),char(97),char(99),
+char(101),char(0),char(42),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(70),char(108),char(111),char(97),char(116),char(66),
+char(118),char(104),char(0),char(42),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(68),char(111),char(117),char(98),char(108),
+char(101),char(66),char(118),char(104),char(0),char(42),char(109),char(95),char(116),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(102),char(111),
+char(77),char(97),char(112),char(0),char(109),char(95),char(112),char(97),char(100),char(51),char(91),char(52),char(93),char(0),char(109),char(95),char(116),char(114),char(105),char(109),
+char(101),char(115),char(104),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(116),char(114),char(97),char(110),char(115),
+char(102),char(111),char(114),char(109),char(0),char(42),char(109),char(95),char(99),char(104),char(105),char(108),char(100),char(83),char(104),char(97),char(112),char(101),char(0),char(109),
+char(95),char(99),char(104),char(105),char(108),char(100),char(83),char(104),char(97),char(112),char(101),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(99),char(104),
+char(105),char(108),char(100),char(77),char(97),char(114),char(103),char(105),char(110),char(0),char(42),char(109),char(95),char(99),char(104),char(105),char(108),char(100),char(83),char(104),
+char(97),char(112),char(101),char(80),char(116),char(114),char(0),char(109),char(95),char(110),char(117),char(109),char(67),char(104),char(105),char(108),char(100),char(83),char(104),char(97),
+char(112),char(101),char(115),char(0),char(109),char(95),char(117),char(112),char(65),char(120),char(105),char(115),char(0),char(109),char(95),char(102),char(108),char(97),char(103),char(115),
+char(0),char(109),char(95),char(101),char(100),char(103),char(101),char(86),char(48),char(86),char(49),char(65),char(110),char(103),char(108),char(101),char(0),char(109),char(95),char(101),
+char(100),char(103),char(101),char(86),char(49),char(86),char(50),char(65),char(110),char(103),char(108),char(101),char(0),char(109),char(95),char(101),char(100),char(103),char(101),char(86),
+char(50),char(86),char(48),char(65),char(110),char(103),char(108),char(101),char(0),char(42),char(109),char(95),char(104),char(97),char(115),char(104),char(84),char(97),char(98),char(108),
+char(101),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(110),char(101),char(120),char(116),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(118),
+char(97),char(108),char(117),char(101),char(65),char(114),char(114),char(97),char(121),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(107),char(101),char(121),char(65),
+char(114),char(114),char(97),char(121),char(80),char(116),char(114),char(0),char(109),char(95),char(99),char(111),char(110),char(118),char(101),char(120),char(69),char(112),char(115),char(105),
+char(108),char(111),char(110),char(0),char(109),char(95),char(112),char(108),char(97),char(110),char(97),char(114),char(69),char(112),char(115),char(105),char(108),char(111),char(110),char(0),
+char(109),char(95),char(101),char(113),char(117),char(97),char(108),char(86),char(101),char(114),char(116),char(101),char(120),char(84),char(104),char(114),char(101),char(115),char(104),char(111),
+char(108),char(100),char(0),char(109),char(95),char(101),char(100),char(103),char(101),char(68),char(105),char(115),char(116),char(97),char(110),char(99),char(101),char(84),char(104),char(114),
+char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(122),char(101),char(114),char(111),char(65),char(114),char(101),char(97),char(84),char(104),char(114),
+char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(110),char(101),char(120),char(116),char(83),char(105),char(122),char(101),char(0),char(109),char(95),
+char(104),char(97),char(115),char(104),char(84),char(97),char(98),char(108),char(101),char(83),char(105),char(122),char(101),char(0),char(109),char(95),char(110),char(117),char(109),char(86),
+char(97),char(108),char(117),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(75),char(101),char(121),char(115),char(0),char(109),char(95),char(103),char(105),
+char(109),char(112),char(97),char(99),char(116),char(83),char(117),char(98),char(84),char(121),char(112),char(101),char(0),char(42),char(109),char(95),char(117),char(110),char(115),char(99),
+char(97),char(108),char(101),char(100),char(80),char(111),char(105),char(110),char(116),char(115),char(70),char(108),char(111),char(97),char(116),char(80),char(116),char(114),char(0),char(42),
+char(109),char(95),char(117),char(110),char(115),char(99),char(97),char(108),char(101),char(100),char(80),char(111),char(105),char(110),char(116),char(115),char(68),char(111),char(117),char(98),
+char(108),char(101),char(80),char(116),char(114),char(0),char(109),char(95),char(110),char(117),char(109),char(85),char(110),char(115),char(99),char(97),char(108),char(101),char(100),char(80),
+char(111),char(105),char(110),char(116),char(115),char(0),char(109),char(95),char(112),char(97),char(100),char(100),char(105),char(110),char(103),char(51),char(91),char(52),char(93),char(0),
+char(42),char(109),char(95),char(98),char(114),char(111),char(97),char(100),char(112),char(104),char(97),char(115),char(101),char(72),char(97),char(110),char(100),char(108),char(101),char(0),
+char(42),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(83),char(104),char(97),char(112),char(101),char(0),char(42),char(109),
+char(95),char(114),char(111),char(111),char(116),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(83),char(104),char(97),char(112),char(101),char(0),
+char(109),char(95),char(119),char(111),char(114),char(108),char(100),char(84),char(114),char(97),char(110),char(115),char(102),char(111),char(114),char(109),char(0),char(109),char(95),char(105),
+char(110),char(116),char(101),char(114),char(112),char(111),char(108),char(97),char(116),char(105),char(111),char(110),char(87),char(111),char(114),char(108),char(100),char(84),char(114),char(97),
+char(110),char(115),char(102),char(111),char(114),char(109),char(0),char(109),char(95),char(105),char(110),char(116),char(101),char(114),char(112),char(111),char(108),char(97),char(116),char(105),
+char(111),char(110),char(76),char(105),char(110),char(101),char(97),char(114),char(86),char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(105),
+char(110),char(116),char(101),char(114),char(112),char(111),char(108),char(97),char(116),char(105),char(111),char(110),char(65),char(110),char(103),char(117),char(108),char(97),char(114),char(86),
+char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(110),char(105),char(115),char(111),char(116),char(114),char(111),char(112),char(105),
+char(99),char(70),char(114),char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(99),char(111),char(110),char(116),char(97),char(99),char(116),char(80),
+char(114),char(111),char(99),char(101),char(115),char(115),char(105),char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(100),char(101),char(97),char(99),char(116),char(105),char(118),char(97),char(116),char(105),char(111),char(110),char(84),char(105),char(109),char(101),char(0),char(109),char(95),
+char(102),char(114),char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(114),char(111),char(108),char(108),char(105),char(110),char(103),char(70),char(114),
+char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(105),char(116),char(117),char(116),char(105),char(111),char(110),
+char(0),char(109),char(95),char(104),char(105),char(116),char(70),char(114),char(97),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(99),char(99),char(100),
+char(83),char(119),char(101),char(112),char(116),char(83),char(112),char(104),char(101),char(114),char(101),char(82),char(97),char(100),char(105),char(117),char(115),char(0),char(109),char(95),
+char(99),char(99),char(100),char(77),char(111),char(116),char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(104),char(97),char(115),char(65),char(110),char(105),char(115),char(111),char(116),char(114),char(111),char(112),char(105),char(99),char(70),char(114),char(105),char(99),char(116),
+char(105),char(111),char(110),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(70),char(108),char(97),char(103),char(115),
+char(0),char(109),char(95),char(105),char(115),char(108),char(97),char(110),char(100),char(84),char(97),char(103),char(49),char(0),char(109),char(95),char(99),char(111),char(109),char(112),
+char(97),char(110),char(105),char(111),char(110),char(73),char(100),char(0),char(109),char(95),char(97),char(99),char(116),char(105),char(118),char(97),char(116),char(105),char(111),char(110),
+char(83),char(116),char(97),char(116),char(101),char(49),char(0),char(109),char(95),char(105),char(110),char(116),char(101),char(114),char(110),char(97),char(108),char(84),char(121),char(112),
+char(101),char(0),char(109),char(95),char(99),char(104),char(101),char(99),char(107),char(67),char(111),char(108),char(108),char(105),char(100),char(101),char(87),char(105),char(116),char(104),
+char(0),char(109),char(95),char(115),char(111),char(108),char(118),char(101),char(114),char(73),char(110),char(102),char(111),char(0),char(109),char(95),char(103),char(114),char(97),char(118),
+char(105),char(116),char(121),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),
+char(116),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(105),char(110),char(118),char(73),char(110),char(101),char(114),char(116),char(105),char(97),char(84),char(101),
+char(110),char(115),char(111),char(114),char(87),char(111),char(114),char(108),char(100),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(86),char(101),
+char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(86),char(101),char(108),char(111),
+char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(70),char(97),char(99),char(116),char(111),char(114),
+char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(103),char(114),
+char(97),char(118),char(105),char(116),char(121),char(95),char(97),char(99),char(99),char(101),char(108),char(101),char(114),char(97),char(116),char(105),char(111),char(110),char(0),char(109),
+char(95),char(105),char(110),char(118),char(73),char(110),char(101),char(114),char(116),char(105),char(97),char(76),char(111),char(99),char(97),char(108),char(0),char(109),char(95),char(116),
+char(111),char(116),char(97),char(108),char(70),char(111),char(114),char(99),char(101),char(0),char(109),char(95),char(116),char(111),char(116),char(97),char(108),char(84),char(111),char(114),
+char(113),char(117),char(101),char(0),char(109),char(95),char(105),char(110),char(118),char(101),char(114),char(115),char(101),char(77),char(97),char(115),char(115),char(0),char(109),char(95),
+char(108),char(105),char(110),char(101),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(97),char(110),char(103),char(117),
+char(108),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(97),char(100),char(100),char(105),char(116),char(105),char(111),
+char(110),char(97),char(108),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(97),
+char(100),char(100),char(105),char(116),char(105),char(111),char(110),char(97),char(108),char(76),char(105),char(110),char(101),char(97),char(114),char(68),char(97),char(109),char(112),char(105),
+char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(83),char(113),char(114),char(0),char(109),char(95),char(97),char(100),char(100),
+char(105),char(116),char(105),char(111),char(110),char(97),char(108),char(65),char(110),char(103),char(117),char(108),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),
+char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(83),char(113),char(114),char(0),char(109),char(95),char(97),char(100),char(100),char(105),
+char(116),char(105),char(111),char(110),char(97),char(108),char(65),char(110),char(103),char(117),char(108),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),char(103),
+char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(83),char(108),char(101),char(101),char(112),
+char(105),char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),
+char(97),char(114),char(83),char(108),char(101),char(101),char(112),char(105),char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),
+char(109),char(95),char(97),char(100),char(100),char(105),char(116),char(105),char(111),char(110),char(97),char(108),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(0),
+char(109),char(95),char(110),char(117),char(109),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(82),char(111),char(119),char(115),char(0),
+char(110),char(117),char(98),char(0),char(42),char(109),char(95),char(114),char(98),char(65),char(0),char(42),char(109),char(95),char(114),char(98),char(66),char(0),char(109),char(95),
+char(111),char(98),char(106),char(101),char(99),char(116),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(117),char(115),char(101),char(114),char(67),char(111),char(110),
+char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(117),char(115),char(101),char(114),char(67),char(111),
+char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(73),char(100),char(0),char(109),char(95),char(110),char(101),char(101),char(100),char(115),char(70),char(101),
+char(101),char(100),char(98),char(97),char(99),char(107),char(0),char(109),char(95),char(97),char(112),char(112),char(108),char(105),char(101),char(100),char(73),char(109),char(112),char(117),
+char(108),char(115),char(101),char(0),char(109),char(95),char(100),char(98),char(103),char(68),char(114),char(97),char(119),char(83),char(105),char(122),char(101),char(0),char(109),char(95),
+char(100),char(105),char(115),char(97),char(98),char(108),char(101),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(115),char(66),char(101),char(116),
+char(119),char(101),char(101),char(110),char(76),char(105),char(110),char(107),char(101),char(100),char(66),char(111),char(100),char(105),char(101),char(115),char(0),char(109),char(95),char(111),
+char(118),char(101),char(114),char(114),char(105),char(100),char(101),char(78),char(117),char(109),char(83),char(111),char(108),char(118),char(101),char(114),char(73),char(116),char(101),char(114),
+char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(98),char(114),char(101),char(97),char(107),char(105),char(110),char(103),char(73),char(109),char(112),
+char(117),char(108),char(115),char(101),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(105),char(115),char(69),char(110),
+char(97),char(98),char(108),char(101),char(100),char(0),char(109),char(95),char(116),char(121),char(112),char(101),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),
+char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(112),char(105),char(118),char(111),char(116),char(73),char(110),char(65),char(0),char(109),char(95),
+char(112),char(105),char(118),char(111),char(116),char(73),char(110),char(66),char(0),char(109),char(95),char(114),char(98),char(65),char(70),char(114),char(97),char(109),char(101),char(0),
+char(109),char(95),char(114),char(98),char(66),char(70),char(114),char(97),char(109),char(101),char(0),char(109),char(95),char(117),char(115),char(101),char(82),char(101),char(102),char(101),
+char(114),char(101),char(110),char(99),char(101),char(70),char(114),char(97),char(109),char(101),char(65),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),
+char(114),char(79),char(110),char(108),char(121),char(0),char(109),char(95),char(101),char(110),char(97),char(98),char(108),char(101),char(65),char(110),char(103),char(117),char(108),char(97),
+char(114),char(77),char(111),char(116),char(111),char(114),char(0),char(109),char(95),char(109),char(111),char(116),char(111),char(114),char(84),char(97),char(114),char(103),char(101),char(116),
+char(86),char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(109),char(97),char(120),char(77),char(111),char(116),char(111),char(114),char(73),
+char(109),char(112),char(117),char(108),char(115),char(101),char(0),char(109),char(95),char(108),char(111),char(119),char(101),char(114),char(76),char(105),char(109),char(105),char(116),char(0),
+char(109),char(95),char(117),char(112),char(112),char(101),char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(108),char(105),char(109),char(105),char(116),
+char(83),char(111),char(102),char(116),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(98),char(105),char(97),char(115),char(70),char(97),char(99),char(116),char(111),
+char(114),char(0),char(109),char(95),char(114),char(101),char(108),char(97),char(120),char(97),char(116),char(105),char(111),char(110),char(70),char(97),char(99),char(116),char(111),char(114),
+char(0),char(109),char(95),char(115),char(119),char(105),char(110),char(103),char(83),char(112),char(97),char(110),char(49),char(0),char(109),char(95),char(115),char(119),char(105),char(110),
+char(103),char(83),char(112),char(97),char(110),char(50),char(0),char(109),char(95),char(116),char(119),char(105),char(115),char(116),char(83),char(112),char(97),char(110),char(0),char(109),
+char(95),char(100),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(85),char(112),char(112),
+char(101),char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(76),char(111),char(119),char(101),
+char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(85),char(112),char(112),char(101),
+char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(76),char(111),char(119),char(101),
+char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(117),char(115),char(101),char(76),char(105),char(110),char(101),char(97),char(114),char(82),char(101),
+char(102),char(101),char(114),char(101),char(110),char(99),char(101),char(70),char(114),char(97),char(109),char(101),char(65),char(0),char(109),char(95),char(117),char(115),char(101),char(79),
+char(102),char(102),char(115),char(101),char(116),char(70),char(111),char(114),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(70),char(114),
+char(97),char(109),char(101),char(0),char(109),char(95),char(54),char(100),char(111),char(102),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(115),char(112),char(114),
+char(105),char(110),char(103),char(69),char(110),char(97),char(98),char(108),char(101),char(100),char(91),char(54),char(93),char(0),char(109),char(95),char(101),char(113),char(117),char(105),
+char(108),char(105),char(98),char(114),char(105),char(117),char(109),char(80),char(111),char(105),char(110),char(116),char(91),char(54),char(93),char(0),char(109),char(95),char(115),char(112),
+char(114),char(105),char(110),char(103),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(91),char(54),char(93),char(0),char(109),char(95),char(115),
+char(112),char(114),char(105),char(110),char(103),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(91),char(54),char(93),char(0),char(109),char(95),char(116),char(97),
+char(117),char(0),char(109),char(95),char(116),char(105),char(109),char(101),char(83),char(116),char(101),char(112),char(0),char(109),char(95),char(109),char(97),char(120),char(69),char(114),
+char(114),char(111),char(114),char(82),char(101),char(100),char(117),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(115),char(111),char(114),char(0),char(109),
+char(95),char(101),char(114),char(112),char(0),char(109),char(95),char(101),char(114),char(112),char(50),char(0),char(109),char(95),char(103),char(108),char(111),char(98),char(97),char(108),
+char(67),char(102),char(109),char(0),char(109),char(95),char(115),char(112),char(108),char(105),char(116),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(80),char(101),
+char(110),char(101),char(116),char(114),char(97),char(116),char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(115),char(112),char(108),char(105),char(116),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(84),char(117),char(114),char(110),char(69),char(114),char(112),
+char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(83),char(108),char(111),char(112),char(0),char(109),char(95),char(119),char(97),char(114),char(109),
+char(115),char(116),char(97),char(114),char(116),char(105),char(110),char(103),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(109),char(97),char(120),
+char(71),char(121),char(114),char(111),char(115),char(99),char(111),char(112),char(105),char(99),char(70),char(111),char(114),char(99),char(101),char(0),char(109),char(95),char(115),char(105),
+char(110),char(103),char(108),char(101),char(65),char(120),char(105),char(115),char(82),char(111),char(108),char(108),char(105),char(110),char(103),char(70),char(114),char(105),char(99),char(116),
+char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(110),char(117),char(109),char(73),char(116),
+char(101),char(114),char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(115),char(111),char(108),char(118),char(101),char(114),char(77),char(111),char(100),
+char(101),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(105),char(110),char(103),char(67),char(111),char(110),char(116),char(97),char(99),char(116),char(82),char(101),
+char(115),char(116),char(105),char(116),char(117),char(116),char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(109),char(105),char(110),char(105),char(109),char(117),char(109),char(83),char(111),char(108),char(118),char(101),char(114),char(66),char(97),char(116),char(99),char(104),char(83),
+char(105),char(122),char(101),char(0),char(109),char(95),char(115),char(112),char(108),char(105),char(116),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(0),char(109),
+char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(97),
+char(110),char(103),char(117),char(108),char(97),char(114),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(118),char(111),
+char(108),char(117),char(109),char(101),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(0),char(42),char(109),char(95),char(109),char(97),char(116),
+char(101),char(114),char(105),char(97),char(108),char(0),char(109),char(95),char(112),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(112),
+char(114),char(101),char(118),char(105),char(111),char(117),char(115),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(118),char(101),
+char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(99),char(99),char(117),char(109),char(117),char(108),char(97),char(116),char(101),char(100),
+char(70),char(111),char(114),char(99),char(101),char(0),char(109),char(95),char(110),char(111),char(114),char(109),char(97),char(108),char(0),char(109),char(95),char(97),char(114),char(101),
+char(97),char(0),char(109),char(95),char(97),char(116),char(116),char(97),char(99),char(104),char(0),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),
+char(105),char(99),char(101),char(115),char(91),char(50),char(93),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(76),char(101),char(110),char(103),char(116),char(104),
+char(0),char(109),char(95),char(98),char(98),char(101),char(110),char(100),char(105),char(110),char(103),char(0),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),
+char(100),char(105),char(99),char(101),char(115),char(91),char(51),char(93),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(65),char(114),char(101),char(97),char(0),
+char(109),char(95),char(99),char(48),char(91),char(52),char(93),char(0),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),char(105),char(99),char(101),
+char(115),char(91),char(52),char(93),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(86),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),
+char(99),char(49),char(0),char(109),char(95),char(99),char(50),char(0),char(109),char(95),char(99),char(48),char(0),char(109),char(95),char(108),char(111),char(99),char(97),char(108),
+char(70),char(114),char(97),char(109),char(101),char(0),char(42),char(109),char(95),char(114),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(0),char(109),
+char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(97),char(101),char(114),char(111),char(77),char(111),char(100),
+char(101),char(108),char(0),char(109),char(95),char(98),char(97),char(117),char(109),char(103),char(97),char(114),char(116),char(101),char(0),char(109),char(95),char(100),char(114),char(97),
+char(103),char(0),char(109),char(95),char(108),char(105),char(102),char(116),char(0),char(109),char(95),char(112),char(114),char(101),char(115),char(115),char(117),char(114),char(101),char(0),
+char(109),char(95),char(118),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),char(100),char(121),char(110),char(97),char(109),char(105),char(99),char(70),char(114),
+char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(112),char(111),char(115),char(101),char(77),char(97),char(116),char(99),char(104),char(0),char(109),
+char(95),char(114),char(105),char(103),char(105),char(100),char(67),char(111),char(110),char(116),char(97),char(99),char(116),char(72),char(97),char(114),char(100),char(110),char(101),char(115),
+char(115),char(0),char(109),char(95),char(107),char(105),char(110),char(101),char(116),char(105),char(99),char(67),char(111),char(110),char(116),char(97),char(99),char(116),char(72),char(97),
+char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(67),char(111),char(110),char(116),char(97),char(99),char(116),
+char(72),char(97),char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(97),char(110),char(99),char(104),char(111),char(114),char(72),char(97),char(114),
+char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(82),char(105),char(103),char(105),char(100),char(67),char(108),char(117),
+char(115),char(116),char(101),char(114),char(72),char(97),char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(75),
+char(105),char(110),char(101),char(116),char(105),char(99),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(72),char(97),char(114),char(100),char(110),char(101),char(115),
+char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(83),char(111),char(102),char(116),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(72),
+char(97),char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(82),char(105),char(103),char(105),char(100),char(67),
+char(108),char(117),char(115),char(116),char(101),char(114),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(83),char(112),char(108),char(105),char(116),char(0),char(109),
+char(95),char(115),char(111),char(102),char(116),char(75),char(105),char(110),char(101),char(116),char(105),char(99),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(73),
+char(109),char(112),char(117),char(108),char(115),char(101),char(83),char(112),char(108),char(105),char(116),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(83),char(111),
+char(102),char(116),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(83),char(112),char(108),char(105),
+char(116),char(0),char(109),char(95),char(109),char(97),char(120),char(86),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),char(116),char(105),char(109),char(101),
+char(83),char(99),char(97),char(108),char(101),char(0),char(109),char(95),char(118),char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(73),char(116),char(101),char(114),
+char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(112),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(73),char(116),char(101),
+char(114),char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(100),char(114),char(105),char(102),char(116),char(73),char(116),char(101),char(114),char(97),
+char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(99),char(108),char(117),char(115),char(116),char(101),char(114),char(73),char(116),char(101),char(114),char(97),
+char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(114),char(111),char(116),char(0),char(109),char(95),char(115),char(99),char(97),char(108),char(101),char(0),
+char(109),char(95),char(97),char(113),char(113),char(0),char(109),char(95),char(99),char(111),char(109),char(0),char(42),char(109),char(95),char(112),char(111),char(115),char(105),char(116),
+char(105),char(111),char(110),char(115),char(0),char(42),char(109),char(95),char(119),char(101),char(105),char(103),char(104),char(116),char(115),char(0),char(109),char(95),char(110),char(117),
+char(109),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(87),char(101),char(105),char(103),
+char(116),char(115),char(0),char(109),char(95),char(98),char(118),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),char(98),char(102),char(114),char(97),char(109),
+char(101),char(0),char(109),char(95),char(102),char(114),char(97),char(109),char(101),char(120),char(102),char(111),char(114),char(109),char(0),char(109),char(95),char(108),char(111),char(99),
+char(105),char(105),char(0),char(109),char(95),char(105),char(110),char(118),char(119),char(105),char(0),char(109),char(95),char(118),char(105),char(109),char(112),char(117),char(108),char(115),
+char(101),char(115),char(91),char(50),char(93),char(0),char(109),char(95),char(100),char(105),char(109),char(112),char(117),char(108),char(115),char(101),char(115),char(91),char(50),char(93),
+char(0),char(109),char(95),char(108),char(118),char(0),char(109),char(95),char(97),char(118),char(0),char(42),char(109),char(95),char(102),char(114),char(97),char(109),char(101),char(114),
+char(101),char(102),char(115),char(0),char(42),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),char(105),char(99),char(101),char(115),char(0),char(42),
+char(109),char(95),char(109),char(97),char(115),char(115),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(70),char(114),char(97),char(109),char(101),char(82),
+char(101),char(102),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(78),char(111),char(100),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),
+char(77),char(97),char(115),char(115),char(101),char(115),char(0),char(109),char(95),char(105),char(100),char(109),char(97),char(115),char(115),char(0),char(109),char(95),char(105),char(109),
+char(97),char(115),char(115),char(0),char(109),char(95),char(110),char(118),char(105),char(109),char(112),char(117),char(108),char(115),char(101),char(115),char(0),char(109),char(95),char(110),
+char(100),char(105),char(109),char(112),char(117),char(108),char(115),char(101),char(115),char(0),char(109),char(95),char(110),char(100),char(97),char(109),char(112),char(105),char(110),char(103),
+char(0),char(109),char(95),char(108),char(100),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(97),char(100),char(97),char(109),char(112),char(105),
+char(110),char(103),char(0),char(109),char(95),char(109),char(97),char(116),char(99),char(104),char(105),char(110),char(103),char(0),char(109),char(95),char(109),char(97),char(120),char(83),
+char(101),char(108),char(102),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(0),
+char(109),char(95),char(115),char(101),char(108),char(102),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(73),char(109),char(112),char(117),char(108),
+char(115),char(101),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(99),char(111),char(110),char(116),char(97),char(105),char(110),char(115),char(65),
+char(110),char(99),char(104),char(111),char(114),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(100),char(101),char(0),char(109),char(95),char(99),char(108),
+char(117),char(115),char(116),char(101),char(114),char(73),char(110),char(100),char(101),char(120),char(0),char(42),char(109),char(95),char(98),char(111),char(100),char(121),char(65),char(0),
+char(42),char(109),char(95),char(98),char(111),char(100),char(121),char(66),char(0),char(109),char(95),char(114),char(101),char(102),char(115),char(91),char(50),char(93),char(0),char(109),
+char(95),char(99),char(102),char(109),char(0),char(109),char(95),char(115),char(112),char(108),char(105),char(116),char(0),char(109),char(95),char(100),char(101),char(108),char(101),char(116),
+char(101),char(0),char(109),char(95),char(114),char(101),char(108),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(91),char(50),char(93),char(0),char(109),
+char(95),char(98),char(111),char(100),char(121),char(65),char(116),char(121),char(112),char(101),char(0),char(109),char(95),char(98),char(111),char(100),char(121),char(66),char(116),char(121),
+char(112),char(101),char(0),char(109),char(95),char(106),char(111),char(105),char(110),char(116),char(84),char(121),char(112),char(101),char(0),char(42),char(109),char(95),char(112),char(111),
+char(115),char(101),char(0),char(42),char(42),char(109),char(95),char(109),char(97),char(116),char(101),char(114),char(105),char(97),char(108),char(115),char(0),char(42),char(109),char(95),
+char(110),char(111),char(100),char(101),char(115),char(0),char(42),char(109),char(95),char(108),char(105),char(110),char(107),char(115),char(0),char(42),char(109),char(95),char(102),char(97),
+char(99),char(101),char(115),char(0),char(42),char(109),char(95),char(116),char(101),char(116),char(114),char(97),char(104),char(101),char(100),char(114),char(97),char(0),char(42),char(109),
+char(95),char(97),char(110),char(99),char(104),char(111),char(114),char(115),char(0),char(42),char(109),char(95),char(99),char(108),char(117),char(115),char(116),char(101),char(114),char(115),
+char(0),char(42),char(109),char(95),char(106),char(111),char(105),char(110),char(116),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(77),char(97),char(116),char(101),
+char(114),char(105),char(97),char(108),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(76),char(105),char(110),char(107),char(115),char(0),char(109),char(95),char(110),
+char(117),char(109),char(70),char(97),char(99),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(84),char(101),char(116),char(114),char(97),char(104),char(101),
+char(100),char(114),char(97),char(0),char(109),char(95),char(110),char(117),char(109),char(65),char(110),char(99),char(104),char(111),char(114),char(115),char(0),char(109),char(95),char(110),
+char(117),char(109),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(74),char(111),char(105),char(110),
+char(116),char(115),char(0),char(109),char(95),char(99),char(111),char(110),char(102),char(105),char(103),char(0),char(84),char(89),char(80),char(69),char(76),char(0),char(0),char(0),
+char(99),char(104),char(97),char(114),char(0),char(117),char(99),char(104),char(97),char(114),char(0),char(115),char(104),char(111),char(114),char(116),char(0),char(117),char(115),char(104),
+char(111),char(114),char(116),char(0),char(105),char(110),char(116),char(0),char(108),char(111),char(110),char(103),char(0),char(117),char(108),char(111),char(110),char(103),char(0),char(102),
+char(108),char(111),char(97),char(116),char(0),char(100),char(111),char(117),char(98),char(108),char(101),char(0),char(118),char(111),char(105),char(100),char(0),char(80),char(111),char(105),
+char(110),char(116),char(101),char(114),char(65),char(114),char(114),char(97),char(121),char(0),char(98),char(116),char(80),char(104),char(121),char(115),char(105),char(99),char(115),char(83),
+char(121),char(115),char(116),char(101),char(109),char(0),char(76),char(105),char(115),char(116),char(66),char(97),char(115),char(101),char(0),char(98),char(116),char(86),char(101),char(99),
+char(116),char(111),char(114),char(51),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(86),char(101),char(99),char(116),
+char(111),char(114),char(51),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(77),char(97),char(116),char(114),
+char(105),char(120),char(51),char(120),char(51),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(77),char(97),char(116),
+char(114),char(105),char(120),char(51),char(120),char(51),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),
+char(114),char(97),char(110),char(115),char(102),char(111),char(114),char(109),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(84),char(114),char(97),char(110),char(115),char(102),char(111),char(114),char(109),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),
+char(98),char(116),char(66),char(118),char(104),char(83),char(117),char(98),char(116),char(114),char(101),char(101),char(73),char(110),char(102),char(111),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(79),char(112),char(116),char(105),char(109),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(78),char(111),char(100),char(101),char(70),
+char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(79),char(112),char(116),char(105),char(109),char(105),char(122),char(101),char(100),
+char(66),char(118),char(104),char(78),char(111),char(100),char(101),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(78),char(111),char(100),char(101),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(70),char(108),char(111),char(97),char(116),
+char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(68),
+char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),
+char(110),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(83),char(116),char(97),char(116),char(105),char(99),char(80),
+char(108),char(97),char(110),char(101),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(118),
+char(101),char(120),char(73),char(110),char(116),char(101),char(114),char(110),char(97),char(108),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),
+char(98),char(116),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(65),char(110),char(100),char(82),char(97),char(100),char(105),char(117),char(115),char(0),
+char(98),char(116),char(77),char(117),char(108),char(116),char(105),char(83),char(112),char(104),char(101),char(114),char(101),char(83),char(104),char(97),char(112),char(101),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(73),char(110),char(116),char(73),char(110),char(100),char(101),char(120),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(104),char(111),char(114),char(116),char(73),char(110),char(116),char(73),char(110),char(100),char(101),char(120),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(104),char(111),char(114),char(116),char(73),char(110),char(116),char(73),char(110),char(100),char(101),char(120),char(84),char(114),char(105),char(112),char(108),char(101),char(116),
+char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(104),char(97),char(114),char(73),char(110),char(100),char(101),char(120),char(84),char(114),char(105),char(112),
+char(108),char(101),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(77),char(101),char(115),char(104),char(80),char(97),char(114),char(116),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(83),char(116),char(114),char(105),char(100),char(105),char(110),char(103),char(77),char(101),char(115),char(104),char(73),char(110),char(116),
+char(101),char(114),char(102),char(97),char(99),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),char(114),char(105),char(97),char(110),char(103),char(108),
+char(101),char(77),char(101),char(115),char(104),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),char(114),char(105),
+char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(102),char(111),char(77),char(97),char(112),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(83),
+char(99),char(97),char(108),char(101),char(100),char(84),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(77),char(101),char(115),char(104),char(83),char(104),char(97),
+char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(109),char(112),char(111),char(117),char(110),char(100),char(83),char(104),char(97),
+char(112),char(101),char(67),char(104),char(105),char(108),char(100),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(109),char(112),char(111),char(117),
+char(110),char(100),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(121),char(108),char(105),char(110),char(100),
+char(101),char(114),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(97),char(112),char(115),char(117),char(108),
+char(101),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),char(114),char(105),char(97),char(110),char(103),char(108),
+char(101),char(73),char(110),char(102),char(111),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(71),char(73),char(109),char(112),char(97),char(99),char(116),char(77),
+char(101),char(115),char(104),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(118),char(101),
+char(120),char(72),char(117),char(108),char(108),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(108),
+char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),char(116),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),char(116),
+char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(68),char(121),char(110),char(97),char(109),char(105),char(99),char(115),
+char(87),char(111),char(114),char(108),char(100),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),
+char(110),char(116),char(97),char(99),char(116),char(83),char(111),char(108),char(118),char(101),char(114),char(73),char(110),char(102),char(111),char(68),char(111),char(117),char(98),char(108),
+char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(68),char(121),char(110),char(97),char(109),char(105),char(99),char(115),char(87),char(111),char(114),char(108),
+char(100),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(116),char(97),char(99),char(116),
+char(83),char(111),char(108),char(118),char(101),char(114),char(73),char(110),char(102),char(111),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),
+char(98),char(116),char(82),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(82),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(73),char(110),char(102),char(111),char(49),
+char(0),char(98),char(116),char(84),char(121),char(112),char(101),char(100),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(82),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(68),char(97),char(116),char(97),char(0),char(98),
+char(116),char(80),char(111),char(105),char(110),char(116),char(50),char(80),char(111),char(105),char(110),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),
+char(110),char(116),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(80),char(111),char(105),char(110),char(116),char(50),
+char(80),char(111),char(105),char(110),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(111),char(117),char(98),char(108),
+char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(72),char(105),char(110),char(103),char(101),char(67),char(111),char(110),char(115),char(116),char(114),char(97),
+char(105),char(110),char(116),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(72),char(105),char(110),char(103),
+char(101),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(67),char(111),char(110),char(101),char(84),char(119),char(105),char(115),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),
+char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(71),char(101),char(110),char(101),char(114),char(105),char(99),char(54),char(68),char(111),char(102),
+char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(71),char(101),char(110),
+char(101),char(114),char(105),char(99),char(54),char(68),char(111),char(102),char(83),char(112),char(114),char(105),char(110),char(103),char(67),char(111),char(110),char(115),char(116),char(114),
+char(97),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(83),char(108),char(105),char(100),char(101),char(114),char(67),char(111),char(110),
+char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),
+char(77),char(97),char(116),char(101),char(114),char(105),char(97),char(108),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),
+char(121),char(78),char(111),char(100),char(101),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(76),char(105),
+char(110),char(107),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(70),char(97),char(99),char(101),char(68),
+char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(84),char(101),char(116),char(114),char(97),char(68),char(97),char(116),
+char(97),char(0),char(83),char(111),char(102),char(116),char(82),char(105),char(103),char(105),char(100),char(65),char(110),char(99),char(104),char(111),char(114),char(68),char(97),char(116),
+char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(67),char(111),char(110),char(102),char(105),char(103),char(68),char(97),char(116),char(97),
+char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(80),char(111),char(115),char(101),char(68),char(97),char(116),char(97),char(0),char(83),char(111),
+char(102),char(116),char(66),char(111),char(100),char(121),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(74),char(111),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(0),char(0),
+char(84),char(76),char(69),char(78),char(1),char(0),char(1),char(0),char(2),char(0),char(2),char(0),char(4),char(0),char(4),char(0),char(4),char(0),char(4),char(0),
+char(8),char(0),char(0),char(0),char(12),char(0),char(36),char(0),char(8),char(0),char(16),char(0),char(32),char(0),char(48),char(0),char(96),char(0),char(64),char(0),
+char(-128),char(0),char(20),char(0),char(48),char(0),char(80),char(0),char(16),char(0),char(84),char(0),char(-124),char(0),char(12),char(0),char(52),char(0),char(52),char(0),
+char(20),char(0),char(64),char(0),char(4),char(0),char(4),char(0),char(8),char(0),char(4),char(0),char(32),char(0),char(28),char(0),char(60),char(0),char(56),char(0),
+char(76),char(0),char(76),char(0),char(24),char(0),char(60),char(0),char(60),char(0),char(16),char(0),char(64),char(0),char(68),char(0),char(-48),char(1),char(0),char(1),
+char(-72),char(0),char(-104),char(0),char(104),char(0),char(88),char(0),char(-24),char(1),char(-96),char(3),char(8),char(0),char(52),char(0),char(0),char(0),char(84),char(0),
+char(116),char(0),char(92),char(1),char(-36),char(0),char(-44),char(0),char(-4),char(0),char(92),char(1),char(-52),char(0),char(16),char(0),char(100),char(0),char(20),char(0),
+char(36),char(0),char(100),char(0),char(92),char(0),char(104),char(0),char(-64),char(0),char(92),char(1),char(104),char(0),char(-84),char(1),char(83),char(84),char(82),char(67),
+char(65),char(0),char(0),char(0),char(10),char(0),char(3),char(0),char(4),char(0),char(0),char(0),char(4),char(0),char(1),char(0),char(9),char(0),char(2),char(0),
+char(11),char(0),char(3),char(0),char(10),char(0),char(3),char(0),char(10),char(0),char(4),char(0),char(10),char(0),char(5),char(0),char(12),char(0),char(2),char(0),
+char(9),char(0),char(6),char(0),char(9),char(0),char(7),char(0),char(13),char(0),char(1),char(0),char(7),char(0),char(8),char(0),char(14),char(0),char(1),char(0),
+char(8),char(0),char(8),char(0),char(15),char(0),char(1),char(0),char(13),char(0),char(9),char(0),char(16),char(0),char(1),char(0),char(14),char(0),char(9),char(0),
+char(17),char(0),char(2),char(0),char(15),char(0),char(10),char(0),char(13),char(0),char(11),char(0),char(18),char(0),char(2),char(0),char(16),char(0),char(10),char(0),
+char(14),char(0),char(11),char(0),char(19),char(0),char(4),char(0),char(4),char(0),char(12),char(0),char(4),char(0),char(13),char(0),char(2),char(0),char(14),char(0),
+char(2),char(0),char(15),char(0),char(20),char(0),char(6),char(0),char(13),char(0),char(16),char(0),char(13),char(0),char(17),char(0),char(4),char(0),char(18),char(0),
+char(4),char(0),char(19),char(0),char(4),char(0),char(20),char(0),char(0),char(0),char(21),char(0),char(21),char(0),char(6),char(0),char(14),char(0),char(16),char(0),
+char(14),char(0),char(17),char(0),char(4),char(0),char(18),char(0),char(4),char(0),char(19),char(0),char(4),char(0),char(20),char(0),char(0),char(0),char(21),char(0),
+char(22),char(0),char(3),char(0),char(2),char(0),char(14),char(0),char(2),char(0),char(15),char(0),char(4),char(0),char(22),char(0),char(23),char(0),char(12),char(0),
+char(13),char(0),char(23),char(0),char(13),char(0),char(24),char(0),char(13),char(0),char(25),char(0),char(4),char(0),char(26),char(0),char(4),char(0),char(27),char(0),
+char(4),char(0),char(28),char(0),char(4),char(0),char(29),char(0),char(20),char(0),char(30),char(0),char(22),char(0),char(31),char(0),char(19),char(0),char(32),char(0),
+char(4),char(0),char(33),char(0),char(4),char(0),char(34),char(0),char(24),char(0),char(12),char(0),char(14),char(0),char(23),char(0),char(14),char(0),char(24),char(0),
+char(14),char(0),char(25),char(0),char(4),char(0),char(26),char(0),char(4),char(0),char(27),char(0),char(4),char(0),char(28),char(0),char(4),char(0),char(29),char(0),
+char(21),char(0),char(30),char(0),char(22),char(0),char(31),char(0),char(4),char(0),char(33),char(0),char(4),char(0),char(34),char(0),char(19),char(0),char(32),char(0),
+char(25),char(0),char(3),char(0),char(0),char(0),char(35),char(0),char(4),char(0),char(36),char(0),char(0),char(0),char(37),char(0),char(26),char(0),char(5),char(0),
+char(25),char(0),char(38),char(0),char(13),char(0),char(39),char(0),char(13),char(0),char(40),char(0),char(7),char(0),char(41),char(0),char(0),char(0),char(21),char(0),
+char(27),char(0),char(5),char(0),char(25),char(0),char(38),char(0),char(13),char(0),char(39),char(0),char(13),char(0),char(42),char(0),char(7),char(0),char(43),char(0),
+char(4),char(0),char(44),char(0),char(28),char(0),char(2),char(0),char(13),char(0),char(45),char(0),char(7),char(0),char(46),char(0),char(29),char(0),char(4),char(0),
+char(27),char(0),char(47),char(0),char(28),char(0),char(48),char(0),char(4),char(0),char(49),char(0),char(0),char(0),char(37),char(0),char(30),char(0),char(1),char(0),
+char(4),char(0),char(50),char(0),char(31),char(0),char(2),char(0),char(2),char(0),char(50),char(0),char(0),char(0),char(51),char(0),char(32),char(0),char(2),char(0),
+char(2),char(0),char(52),char(0),char(0),char(0),char(51),char(0),char(33),char(0),char(2),char(0),char(0),char(0),char(52),char(0),char(0),char(0),char(53),char(0),
+char(34),char(0),char(8),char(0),char(13),char(0),char(54),char(0),char(14),char(0),char(55),char(0),char(30),char(0),char(56),char(0),char(32),char(0),char(57),char(0),
+char(33),char(0),char(58),char(0),char(31),char(0),char(59),char(0),char(4),char(0),char(60),char(0),char(4),char(0),char(61),char(0),char(35),char(0),char(4),char(0),
+char(34),char(0),char(62),char(0),char(13),char(0),char(63),char(0),char(4),char(0),char(64),char(0),char(0),char(0),char(37),char(0),char(36),char(0),char(7),char(0),
+char(25),char(0),char(38),char(0),char(35),char(0),char(65),char(0),char(23),char(0),char(66),char(0),char(24),char(0),char(67),char(0),char(37),char(0),char(68),char(0),
+char(7),char(0),char(43),char(0),char(0),char(0),char(69),char(0),char(38),char(0),char(2),char(0),char(36),char(0),char(70),char(0),char(13),char(0),char(39),char(0),
+char(39),char(0),char(4),char(0),char(17),char(0),char(71),char(0),char(25),char(0),char(72),char(0),char(4),char(0),char(73),char(0),char(7),char(0),char(74),char(0),
+char(40),char(0),char(4),char(0),char(25),char(0),char(38),char(0),char(39),char(0),char(75),char(0),char(4),char(0),char(76),char(0),char(7),char(0),char(43),char(0),
+char(41),char(0),char(3),char(0),char(27),char(0),char(47),char(0),char(4),char(0),char(77),char(0),char(0),char(0),char(37),char(0),char(42),char(0),char(3),char(0),
+char(27),char(0),char(47),char(0),char(4),char(0),char(77),char(0),char(0),char(0),char(37),char(0),char(43),char(0),char(4),char(0),char(4),char(0),char(78),char(0),
+char(7),char(0),char(79),char(0),char(7),char(0),char(80),char(0),char(7),char(0),char(81),char(0),char(37),char(0),char(14),char(0),char(4),char(0),char(82),char(0),
+char(4),char(0),char(83),char(0),char(43),char(0),char(84),char(0),char(4),char(0),char(85),char(0),char(7),char(0),char(86),char(0),char(7),char(0),char(87),char(0),
+char(7),char(0),char(88),char(0),char(7),char(0),char(89),char(0),char(7),char(0),char(90),char(0),char(4),char(0),char(91),char(0),char(4),char(0),char(92),char(0),
+char(4),char(0),char(93),char(0),char(4),char(0),char(94),char(0),char(0),char(0),char(37),char(0),char(44),char(0),char(5),char(0),char(25),char(0),char(38),char(0),
+char(35),char(0),char(65),char(0),char(13),char(0),char(39),char(0),char(7),char(0),char(43),char(0),char(4),char(0),char(95),char(0),char(45),char(0),char(5),char(0),
+char(27),char(0),char(47),char(0),char(13),char(0),char(96),char(0),char(14),char(0),char(97),char(0),char(4),char(0),char(98),char(0),char(0),char(0),char(99),char(0),
+char(46),char(0),char(25),char(0),char(9),char(0),char(100),char(0),char(9),char(0),char(101),char(0),char(25),char(0),char(102),char(0),char(0),char(0),char(35),char(0),
+char(18),char(0),char(103),char(0),char(18),char(0),char(104),char(0),char(14),char(0),char(105),char(0),char(14),char(0),char(106),char(0),char(14),char(0),char(107),char(0),
+char(8),char(0),char(108),char(0),char(8),char(0),char(109),char(0),char(8),char(0),char(110),char(0),char(8),char(0),char(111),char(0),char(8),char(0),char(112),char(0),
+char(8),char(0),char(113),char(0),char(8),char(0),char(114),char(0),char(8),char(0),char(115),char(0),char(4),char(0),char(116),char(0),char(4),char(0),char(117),char(0),
+char(4),char(0),char(118),char(0),char(4),char(0),char(119),char(0),char(4),char(0),char(120),char(0),char(4),char(0),char(121),char(0),char(4),char(0),char(122),char(0),
+char(0),char(0),char(37),char(0),char(47),char(0),char(25),char(0),char(9),char(0),char(100),char(0),char(9),char(0),char(101),char(0),char(25),char(0),char(102),char(0),
+char(0),char(0),char(35),char(0),char(17),char(0),char(103),char(0),char(17),char(0),char(104),char(0),char(13),char(0),char(105),char(0),char(13),char(0),char(106),char(0),
+char(13),char(0),char(107),char(0),char(7),char(0),char(108),char(0),char(7),char(0),char(109),char(0),char(7),char(0),char(110),char(0),char(7),char(0),char(111),char(0),
+char(7),char(0),char(112),char(0),char(7),char(0),char(113),char(0),char(7),char(0),char(114),char(0),char(7),char(0),char(115),char(0),char(4),char(0),char(116),char(0),
+char(4),char(0),char(117),char(0),char(4),char(0),char(118),char(0),char(4),char(0),char(119),char(0),char(4),char(0),char(120),char(0),char(4),char(0),char(121),char(0),
+char(4),char(0),char(122),char(0),char(0),char(0),char(37),char(0),char(48),char(0),char(2),char(0),char(49),char(0),char(123),char(0),char(14),char(0),char(124),char(0),
+char(50),char(0),char(2),char(0),char(51),char(0),char(123),char(0),char(13),char(0),char(124),char(0),char(52),char(0),char(21),char(0),char(47),char(0),char(125),char(0),
+char(15),char(0),char(126),char(0),char(13),char(0),char(127),char(0),char(13),char(0),char(-128),char(0),char(13),char(0),char(-127),char(0),char(13),char(0),char(-126),char(0),
+char(13),char(0),char(124),char(0),char(13),char(0),char(-125),char(0),char(13),char(0),char(-124),char(0),char(13),char(0),char(-123),char(0),char(13),char(0),char(-122),char(0),
+char(7),char(0),char(-121),char(0),char(7),char(0),char(-120),char(0),char(7),char(0),char(-119),char(0),char(7),char(0),char(-118),char(0),char(7),char(0),char(-117),char(0),
+char(7),char(0),char(-116),char(0),char(7),char(0),char(-115),char(0),char(7),char(0),char(-114),char(0),char(7),char(0),char(-113),char(0),char(4),char(0),char(-112),char(0),
+char(53),char(0),char(22),char(0),char(46),char(0),char(125),char(0),char(16),char(0),char(126),char(0),char(14),char(0),char(127),char(0),char(14),char(0),char(-128),char(0),
+char(14),char(0),char(-127),char(0),char(14),char(0),char(-126),char(0),char(14),char(0),char(124),char(0),char(14),char(0),char(-125),char(0),char(14),char(0),char(-124),char(0),
+char(14),char(0),char(-123),char(0),char(14),char(0),char(-122),char(0),char(8),char(0),char(-121),char(0),char(8),char(0),char(-120),char(0),char(8),char(0),char(-119),char(0),
+char(8),char(0),char(-118),char(0),char(8),char(0),char(-117),char(0),char(8),char(0),char(-116),char(0),char(8),char(0),char(-115),char(0),char(8),char(0),char(-114),char(0),
+char(8),char(0),char(-113),char(0),char(4),char(0),char(-112),char(0),char(0),char(0),char(37),char(0),char(54),char(0),char(2),char(0),char(4),char(0),char(-111),char(0),
+char(4),char(0),char(-110),char(0),char(55),char(0),char(13),char(0),char(56),char(0),char(-109),char(0),char(56),char(0),char(-108),char(0),char(0),char(0),char(35),char(0),
+char(4),char(0),char(-107),char(0),char(4),char(0),char(-106),char(0),char(4),char(0),char(-105),char(0),char(4),char(0),char(-104),char(0),char(7),char(0),char(-103),char(0),
+char(7),char(0),char(-102),char(0),char(4),char(0),char(-101),char(0),char(4),char(0),char(-100),char(0),char(7),char(0),char(-99),char(0),char(4),char(0),char(-98),char(0),
+char(57),char(0),char(3),char(0),char(55),char(0),char(-97),char(0),char(13),char(0),char(-96),char(0),char(13),char(0),char(-95),char(0),char(58),char(0),char(3),char(0),
+char(55),char(0),char(-97),char(0),char(14),char(0),char(-96),char(0),char(14),char(0),char(-95),char(0),char(59),char(0),char(13),char(0),char(55),char(0),char(-97),char(0),
+char(18),char(0),char(-94),char(0),char(18),char(0),char(-93),char(0),char(4),char(0),char(-92),char(0),char(4),char(0),char(-91),char(0),char(4),char(0),char(-90),char(0),
+char(7),char(0),char(-89),char(0),char(7),char(0),char(-88),char(0),char(7),char(0),char(-87),char(0),char(7),char(0),char(-86),char(0),char(7),char(0),char(-85),char(0),
+char(7),char(0),char(-84),char(0),char(7),char(0),char(-83),char(0),char(60),char(0),char(13),char(0),char(55),char(0),char(-97),char(0),char(17),char(0),char(-94),char(0),
+char(17),char(0),char(-93),char(0),char(4),char(0),char(-92),char(0),char(4),char(0),char(-91),char(0),char(4),char(0),char(-90),char(0),char(7),char(0),char(-89),char(0),
+char(7),char(0),char(-88),char(0),char(7),char(0),char(-87),char(0),char(7),char(0),char(-86),char(0),char(7),char(0),char(-85),char(0),char(7),char(0),char(-84),char(0),
+char(7),char(0),char(-83),char(0),char(61),char(0),char(11),char(0),char(55),char(0),char(-97),char(0),char(17),char(0),char(-94),char(0),char(17),char(0),char(-93),char(0),
+char(7),char(0),char(-82),char(0),char(7),char(0),char(-81),char(0),char(7),char(0),char(-80),char(0),char(7),char(0),char(-85),char(0),char(7),char(0),char(-84),char(0),
+char(7),char(0),char(-83),char(0),char(7),char(0),char(-79),char(0),char(0),char(0),char(21),char(0),char(62),char(0),char(9),char(0),char(55),char(0),char(-97),char(0),
+char(17),char(0),char(-94),char(0),char(17),char(0),char(-93),char(0),char(13),char(0),char(-78),char(0),char(13),char(0),char(-77),char(0),char(13),char(0),char(-76),char(0),
+char(13),char(0),char(-75),char(0),char(4),char(0),char(-74),char(0),char(4),char(0),char(-73),char(0),char(63),char(0),char(5),char(0),char(62),char(0),char(-72),char(0),
+char(4),char(0),char(-71),char(0),char(7),char(0),char(-70),char(0),char(7),char(0),char(-69),char(0),char(7),char(0),char(-68),char(0),char(64),char(0),char(9),char(0),
+char(55),char(0),char(-97),char(0),char(17),char(0),char(-94),char(0),char(17),char(0),char(-93),char(0),char(7),char(0),char(-78),char(0),char(7),char(0),char(-77),char(0),
+char(7),char(0),char(-76),char(0),char(7),char(0),char(-75),char(0),char(4),char(0),char(-74),char(0),char(4),char(0),char(-73),char(0),char(49),char(0),char(22),char(0),
+char(8),char(0),char(-67),char(0),char(8),char(0),char(-79),char(0),char(8),char(0),char(110),char(0),char(8),char(0),char(-66),char(0),char(8),char(0),char(112),char(0),
+char(8),char(0),char(-65),char(0),char(8),char(0),char(-64),char(0),char(8),char(0),char(-63),char(0),char(8),char(0),char(-62),char(0),char(8),char(0),char(-61),char(0),
+char(8),char(0),char(-60),char(0),char(8),char(0),char(-59),char(0),char(8),char(0),char(-58),char(0),char(8),char(0),char(-57),char(0),char(8),char(0),char(-56),char(0),
+char(8),char(0),char(-55),char(0),char(4),char(0),char(-54),char(0),char(4),char(0),char(-53),char(0),char(4),char(0),char(-52),char(0),char(4),char(0),char(-51),char(0),
+char(4),char(0),char(-50),char(0),char(0),char(0),char(37),char(0),char(51),char(0),char(22),char(0),char(7),char(0),char(-67),char(0),char(7),char(0),char(-79),char(0),
+char(7),char(0),char(110),char(0),char(7),char(0),char(-66),char(0),char(7),char(0),char(112),char(0),char(7),char(0),char(-65),char(0),char(7),char(0),char(-64),char(0),
+char(7),char(0),char(-63),char(0),char(7),char(0),char(-62),char(0),char(7),char(0),char(-61),char(0),char(7),char(0),char(-60),char(0),char(7),char(0),char(-59),char(0),
+char(7),char(0),char(-58),char(0),char(7),char(0),char(-57),char(0),char(7),char(0),char(-56),char(0),char(7),char(0),char(-55),char(0),char(4),char(0),char(-54),char(0),
+char(4),char(0),char(-53),char(0),char(4),char(0),char(-52),char(0),char(4),char(0),char(-51),char(0),char(4),char(0),char(-50),char(0),char(0),char(0),char(37),char(0),
+char(65),char(0),char(4),char(0),char(7),char(0),char(-49),char(0),char(7),char(0),char(-48),char(0),char(7),char(0),char(-47),char(0),char(4),char(0),char(78),char(0),
+char(66),char(0),char(10),char(0),char(65),char(0),char(-46),char(0),char(13),char(0),char(-45),char(0),char(13),char(0),char(-44),char(0),char(13),char(0),char(-43),char(0),
+char(13),char(0),char(-42),char(0),char(13),char(0),char(-41),char(0),char(7),char(0),char(-121),char(0),char(7),char(0),char(-40),char(0),char(4),char(0),char(-39),char(0),
+char(4),char(0),char(53),char(0),char(67),char(0),char(4),char(0),char(65),char(0),char(-46),char(0),char(4),char(0),char(-38),char(0),char(7),char(0),char(-37),char(0),
+char(4),char(0),char(-36),char(0),char(68),char(0),char(4),char(0),char(13),char(0),char(-41),char(0),char(65),char(0),char(-46),char(0),char(4),char(0),char(-35),char(0),
+char(7),char(0),char(-34),char(0),char(69),char(0),char(7),char(0),char(13),char(0),char(-33),char(0),char(65),char(0),char(-46),char(0),char(4),char(0),char(-32),char(0),
+char(7),char(0),char(-31),char(0),char(7),char(0),char(-30),char(0),char(7),char(0),char(-29),char(0),char(4),char(0),char(53),char(0),char(70),char(0),char(6),char(0),
+char(15),char(0),char(-28),char(0),char(13),char(0),char(-30),char(0),char(13),char(0),char(-27),char(0),char(56),char(0),char(-26),char(0),char(4),char(0),char(-25),char(0),
+char(7),char(0),char(-29),char(0),char(71),char(0),char(26),char(0),char(4),char(0),char(-24),char(0),char(7),char(0),char(-23),char(0),char(7),char(0),char(-79),char(0),
+char(7),char(0),char(-22),char(0),char(7),char(0),char(-21),char(0),char(7),char(0),char(-20),char(0),char(7),char(0),char(-19),char(0),char(7),char(0),char(-18),char(0),
+char(7),char(0),char(-17),char(0),char(7),char(0),char(-16),char(0),char(7),char(0),char(-15),char(0),char(7),char(0),char(-14),char(0),char(7),char(0),char(-13),char(0),
+char(7),char(0),char(-12),char(0),char(7),char(0),char(-11),char(0),char(7),char(0),char(-10),char(0),char(7),char(0),char(-9),char(0),char(7),char(0),char(-8),char(0),
+char(7),char(0),char(-7),char(0),char(7),char(0),char(-6),char(0),char(7),char(0),char(-5),char(0),char(4),char(0),char(-4),char(0),char(4),char(0),char(-3),char(0),
+char(4),char(0),char(-2),char(0),char(4),char(0),char(-1),char(0),char(4),char(0),char(117),char(0),char(72),char(0),char(12),char(0),char(15),char(0),char(0),char(1),
+char(15),char(0),char(1),char(1),char(15),char(0),char(2),char(1),char(13),char(0),char(3),char(1),char(13),char(0),char(4),char(1),char(7),char(0),char(5),char(1),
+char(4),char(0),char(6),char(1),char(4),char(0),char(7),char(1),char(4),char(0),char(8),char(1),char(4),char(0),char(9),char(1),char(7),char(0),char(-31),char(0),
+char(4),char(0),char(53),char(0),char(73),char(0),char(27),char(0),char(17),char(0),char(10),char(1),char(15),char(0),char(11),char(1),char(15),char(0),char(12),char(1),
+char(13),char(0),char(3),char(1),char(13),char(0),char(13),char(1),char(13),char(0),char(14),char(1),char(13),char(0),char(15),char(1),char(13),char(0),char(16),char(1),
+char(13),char(0),char(17),char(1),char(4),char(0),char(18),char(1),char(7),char(0),char(19),char(1),char(4),char(0),char(20),char(1),char(4),char(0),char(21),char(1),
+char(4),char(0),char(22),char(1),char(7),char(0),char(23),char(1),char(7),char(0),char(24),char(1),char(4),char(0),char(25),char(1),char(4),char(0),char(26),char(1),
+char(7),char(0),char(27),char(1),char(7),char(0),char(28),char(1),char(7),char(0),char(29),char(1),char(7),char(0),char(30),char(1),char(7),char(0),char(31),char(1),
+char(7),char(0),char(32),char(1),char(4),char(0),char(33),char(1),char(4),char(0),char(34),char(1),char(4),char(0),char(35),char(1),char(74),char(0),char(12),char(0),
+char(9),char(0),char(36),char(1),char(9),char(0),char(37),char(1),char(13),char(0),char(38),char(1),char(7),char(0),char(39),char(1),char(7),char(0),char(-63),char(0),
+char(7),char(0),char(40),char(1),char(4),char(0),char(41),char(1),char(13),char(0),char(42),char(1),char(4),char(0),char(43),char(1),char(4),char(0),char(44),char(1),
+char(4),char(0),char(45),char(1),char(4),char(0),char(53),char(0),char(75),char(0),char(19),char(0),char(47),char(0),char(125),char(0),char(72),char(0),char(46),char(1),
+char(65),char(0),char(47),char(1),char(66),char(0),char(48),char(1),char(67),char(0),char(49),char(1),char(68),char(0),char(50),char(1),char(69),char(0),char(51),char(1),
+char(70),char(0),char(52),char(1),char(73),char(0),char(53),char(1),char(74),char(0),char(54),char(1),char(4),char(0),char(55),char(1),char(4),char(0),char(21),char(1),
+char(4),char(0),char(56),char(1),char(4),char(0),char(57),char(1),char(4),char(0),char(58),char(1),char(4),char(0),char(59),char(1),char(4),char(0),char(60),char(1),
+char(4),char(0),char(61),char(1),char(71),char(0),char(62),char(1),};
+int sBulletDNAlen= sizeof(sBulletDNAstr);
+char sBulletDNAstr64[]= {
+char(83),char(68),char(78),char(65),char(78),char(65),char(77),char(69),char(63),char(1),char(0),char(0),char(109),char(95),char(115),char(105),char(122),char(101),char(0),char(109),
+char(95),char(99),char(97),char(112),char(97),char(99),char(105),char(116),char(121),char(0),char(42),char(109),char(95),char(100),char(97),char(116),char(97),char(0),char(109),char(95),
+char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(83),char(104),char(97),char(112),char(101),char(115),char(0),char(109),char(95),char(99),char(111),
+char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),char(116),char(115),char(0),char(109),char(95),char(99),char(111),char(110),
+char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(115),char(0),char(42),char(102),char(105),char(114),char(115),char(116),char(0),char(42),char(108),char(97),char(115),
+char(116),char(0),char(109),char(95),char(102),char(108),char(111),char(97),char(116),char(115),char(91),char(52),char(93),char(0),char(109),char(95),char(101),char(108),char(91),char(51),
+char(93),char(0),char(109),char(95),char(98),char(97),char(115),char(105),char(115),char(0),char(109),char(95),char(111),char(114),char(105),char(103),char(105),char(110),char(0),char(109),
+char(95),char(114),char(111),char(111),char(116),char(78),char(111),char(100),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(115),char(117),char(98),
+char(116),char(114),char(101),char(101),char(83),char(105),char(122),char(101),char(0),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),
+char(65),char(97),char(98),char(98),char(77),char(105),char(110),char(91),char(51),char(93),char(0),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),
+char(101),char(100),char(65),char(97),char(98),char(98),char(77),char(97),char(120),char(91),char(51),char(93),char(0),char(109),char(95),char(97),char(97),char(98),char(98),char(77),
+char(105),char(110),char(79),char(114),char(103),char(0),char(109),char(95),char(97),char(97),char(98),char(98),char(77),char(97),char(120),char(79),char(114),char(103),char(0),char(109),
+char(95),char(101),char(115),char(99),char(97),char(112),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(115),char(117),char(98),char(80),char(97),
+char(114),char(116),char(0),char(109),char(95),char(116),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),
+char(95),char(112),char(97),char(100),char(91),char(52),char(93),char(0),char(109),char(95),char(101),char(115),char(99),char(97),char(112),char(101),char(73),char(110),char(100),char(101),
+char(120),char(79),char(114),char(84),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(98),
+char(118),char(104),char(65),char(97),char(98),char(98),char(77),char(105),char(110),char(0),char(109),char(95),char(98),char(118),char(104),char(65),char(97),char(98),char(98),char(77),
+char(97),char(120),char(0),char(109),char(95),char(98),char(118),char(104),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(97),char(116),char(105),char(111),char(110),
+char(0),char(109),char(95),char(99),char(117),char(114),char(78),char(111),char(100),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(117),char(115),
+char(101),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(97),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(110),char(117),char(109),char(67),
+char(111),char(110),char(116),char(105),char(103),char(117),char(111),char(117),char(115),char(76),char(101),char(97),char(102),char(78),char(111),char(100),char(101),char(115),char(0),char(109),
+char(95),char(110),char(117),char(109),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(67),char(111),char(110),char(116),char(105),char(103),char(117),
+char(111),char(117),char(115),char(78),char(111),char(100),char(101),char(115),char(0),char(42),char(109),char(95),char(99),char(111),char(110),char(116),char(105),char(103),char(117),char(111),
+char(117),char(115),char(78),char(111),char(100),char(101),char(115),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),
+char(122),char(101),char(100),char(67),char(111),char(110),char(116),char(105),char(103),char(117),char(111),char(117),char(115),char(78),char(111),char(100),char(101),char(115),char(80),char(116),
+char(114),char(0),char(42),char(109),char(95),char(115),char(117),char(98),char(84),char(114),char(101),char(101),char(73),char(110),char(102),char(111),char(80),char(116),char(114),char(0),
+char(109),char(95),char(116),char(114),char(97),char(118),char(101),char(114),char(115),char(97),char(108),char(77),char(111),char(100),char(101),char(0),char(109),char(95),char(110),char(117),
+char(109),char(83),char(117),char(98),char(116),char(114),char(101),char(101),char(72),char(101),char(97),char(100),char(101),char(114),char(115),char(0),char(42),char(109),char(95),char(110),
+char(97),char(109),char(101),char(0),char(109),char(95),char(115),char(104),char(97),char(112),char(101),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(112),char(97),
+char(100),char(100),char(105),char(110),char(103),char(91),char(52),char(93),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),
+char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(108),char(111),char(99),char(97),char(108),char(83),char(99),char(97),
+char(108),char(105),char(110),char(103),char(0),char(109),char(95),char(112),char(108),char(97),char(110),char(101),char(78),char(111),char(114),char(109),char(97),char(108),char(0),char(109),
+char(95),char(112),char(108),char(97),char(110),char(101),char(67),char(111),char(110),char(115),char(116),char(97),char(110),char(116),char(0),char(109),char(95),char(105),char(109),char(112),
+char(108),char(105),char(99),char(105),char(116),char(83),char(104),char(97),char(112),char(101),char(68),char(105),char(109),char(101),char(110),char(115),char(105),char(111),char(110),char(115),
+char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(77),char(97),char(114),char(103),char(105),char(110),char(0),char(109),
+char(95),char(112),char(97),char(100),char(100),char(105),char(110),char(103),char(0),char(109),char(95),char(112),char(111),char(115),char(0),char(109),char(95),char(114),char(97),char(100),
+char(105),char(117),char(115),char(0),char(109),char(95),char(99),char(111),char(110),char(118),char(101),char(120),char(73),char(110),char(116),char(101),char(114),char(110),char(97),char(108),
+char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(42),char(109),char(95),char(108),char(111),char(99),char(97),char(108),char(80),char(111),
+char(115),char(105),char(116),char(105),char(111),char(110),char(65),char(114),char(114),char(97),char(121),char(80),char(116),char(114),char(0),char(109),char(95),char(108),char(111),char(99),
+char(97),char(108),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(65),char(114),char(114),char(97),char(121),char(83),char(105),char(122),char(101),char(0),
+char(109),char(95),char(118),char(97),char(108),char(117),char(101),char(0),char(109),char(95),char(112),char(97),char(100),char(91),char(50),char(93),char(0),char(109),char(95),char(118),
+char(97),char(108),char(117),char(101),char(115),char(91),char(51),char(93),char(0),char(109),char(95),char(112),char(97),char(100),char(0),char(42),char(109),char(95),char(118),char(101),
+char(114),char(116),char(105),char(99),char(101),char(115),char(51),char(102),char(0),char(42),char(109),char(95),char(118),char(101),char(114),char(116),char(105),char(99),char(101),char(115),
+char(51),char(100),char(0),char(42),char(109),char(95),char(105),char(110),char(100),char(105),char(99),char(101),char(115),char(51),char(50),char(0),char(42),char(109),char(95),char(51),
+char(105),char(110),char(100),char(105),char(99),char(101),char(115),char(49),char(54),char(0),char(42),char(109),char(95),char(51),char(105),char(110),char(100),char(105),char(99),char(101),
+char(115),char(56),char(0),char(42),char(109),char(95),char(105),char(110),char(100),char(105),char(99),char(101),char(115),char(49),char(54),char(0),char(109),char(95),char(110),char(117),
+char(109),char(84),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(86),char(101),char(114),char(116),
+char(105),char(99),char(101),char(115),char(0),char(42),char(109),char(95),char(109),char(101),char(115),char(104),char(80),char(97),char(114),char(116),char(115),char(80),char(116),char(114),
+char(0),char(109),char(95),char(115),char(99),char(97),char(108),char(105),char(110),char(103),char(0),char(109),char(95),char(110),char(117),char(109),char(77),char(101),char(115),char(104),
+char(80),char(97),char(114),char(116),char(115),char(0),char(109),char(95),char(109),char(101),char(115),char(104),char(73),char(110),char(116),char(101),char(114),char(102),char(97),char(99),
+char(101),char(0),char(42),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(70),char(108),char(111),char(97),char(116),char(66),
+char(118),char(104),char(0),char(42),char(109),char(95),char(113),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(68),char(111),char(117),char(98),char(108),
+char(101),char(66),char(118),char(104),char(0),char(42),char(109),char(95),char(116),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(102),char(111),
+char(77),char(97),char(112),char(0),char(109),char(95),char(112),char(97),char(100),char(51),char(91),char(52),char(93),char(0),char(109),char(95),char(116),char(114),char(105),char(109),
+char(101),char(115),char(104),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(116),char(114),char(97),char(110),char(115),
+char(102),char(111),char(114),char(109),char(0),char(42),char(109),char(95),char(99),char(104),char(105),char(108),char(100),char(83),char(104),char(97),char(112),char(101),char(0),char(109),
+char(95),char(99),char(104),char(105),char(108),char(100),char(83),char(104),char(97),char(112),char(101),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(99),char(104),
+char(105),char(108),char(100),char(77),char(97),char(114),char(103),char(105),char(110),char(0),char(42),char(109),char(95),char(99),char(104),char(105),char(108),char(100),char(83),char(104),
+char(97),char(112),char(101),char(80),char(116),char(114),char(0),char(109),char(95),char(110),char(117),char(109),char(67),char(104),char(105),char(108),char(100),char(83),char(104),char(97),
+char(112),char(101),char(115),char(0),char(109),char(95),char(117),char(112),char(65),char(120),char(105),char(115),char(0),char(109),char(95),char(102),char(108),char(97),char(103),char(115),
+char(0),char(109),char(95),char(101),char(100),char(103),char(101),char(86),char(48),char(86),char(49),char(65),char(110),char(103),char(108),char(101),char(0),char(109),char(95),char(101),
+char(100),char(103),char(101),char(86),char(49),char(86),char(50),char(65),char(110),char(103),char(108),char(101),char(0),char(109),char(95),char(101),char(100),char(103),char(101),char(86),
+char(50),char(86),char(48),char(65),char(110),char(103),char(108),char(101),char(0),char(42),char(109),char(95),char(104),char(97),char(115),char(104),char(84),char(97),char(98),char(108),
+char(101),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(110),char(101),char(120),char(116),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(118),
+char(97),char(108),char(117),char(101),char(65),char(114),char(114),char(97),char(121),char(80),char(116),char(114),char(0),char(42),char(109),char(95),char(107),char(101),char(121),char(65),
+char(114),char(114),char(97),char(121),char(80),char(116),char(114),char(0),char(109),char(95),char(99),char(111),char(110),char(118),char(101),char(120),char(69),char(112),char(115),char(105),
+char(108),char(111),char(110),char(0),char(109),char(95),char(112),char(108),char(97),char(110),char(97),char(114),char(69),char(112),char(115),char(105),char(108),char(111),char(110),char(0),
+char(109),char(95),char(101),char(113),char(117),char(97),char(108),char(86),char(101),char(114),char(116),char(101),char(120),char(84),char(104),char(114),char(101),char(115),char(104),char(111),
+char(108),char(100),char(0),char(109),char(95),char(101),char(100),char(103),char(101),char(68),char(105),char(115),char(116),char(97),char(110),char(99),char(101),char(84),char(104),char(114),
+char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(122),char(101),char(114),char(111),char(65),char(114),char(101),char(97),char(84),char(104),char(114),
+char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(110),char(101),char(120),char(116),char(83),char(105),char(122),char(101),char(0),char(109),char(95),
+char(104),char(97),char(115),char(104),char(84),char(97),char(98),char(108),char(101),char(83),char(105),char(122),char(101),char(0),char(109),char(95),char(110),char(117),char(109),char(86),
+char(97),char(108),char(117),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(75),char(101),char(121),char(115),char(0),char(109),char(95),char(103),char(105),
+char(109),char(112),char(97),char(99),char(116),char(83),char(117),char(98),char(84),char(121),char(112),char(101),char(0),char(42),char(109),char(95),char(117),char(110),char(115),char(99),
+char(97),char(108),char(101),char(100),char(80),char(111),char(105),char(110),char(116),char(115),char(70),char(108),char(111),char(97),char(116),char(80),char(116),char(114),char(0),char(42),
+char(109),char(95),char(117),char(110),char(115),char(99),char(97),char(108),char(101),char(100),char(80),char(111),char(105),char(110),char(116),char(115),char(68),char(111),char(117),char(98),
+char(108),char(101),char(80),char(116),char(114),char(0),char(109),char(95),char(110),char(117),char(109),char(85),char(110),char(115),char(99),char(97),char(108),char(101),char(100),char(80),
+char(111),char(105),char(110),char(116),char(115),char(0),char(109),char(95),char(112),char(97),char(100),char(100),char(105),char(110),char(103),char(51),char(91),char(52),char(93),char(0),
+char(42),char(109),char(95),char(98),char(114),char(111),char(97),char(100),char(112),char(104),char(97),char(115),char(101),char(72),char(97),char(110),char(100),char(108),char(101),char(0),
+char(42),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(83),char(104),char(97),char(112),char(101),char(0),char(42),char(109),
+char(95),char(114),char(111),char(111),char(116),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(83),char(104),char(97),char(112),char(101),char(0),
+char(109),char(95),char(119),char(111),char(114),char(108),char(100),char(84),char(114),char(97),char(110),char(115),char(102),char(111),char(114),char(109),char(0),char(109),char(95),char(105),
+char(110),char(116),char(101),char(114),char(112),char(111),char(108),char(97),char(116),char(105),char(111),char(110),char(87),char(111),char(114),char(108),char(100),char(84),char(114),char(97),
+char(110),char(115),char(102),char(111),char(114),char(109),char(0),char(109),char(95),char(105),char(110),char(116),char(101),char(114),char(112),char(111),char(108),char(97),char(116),char(105),
+char(111),char(110),char(76),char(105),char(110),char(101),char(97),char(114),char(86),char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(105),
+char(110),char(116),char(101),char(114),char(112),char(111),char(108),char(97),char(116),char(105),char(111),char(110),char(65),char(110),char(103),char(117),char(108),char(97),char(114),char(86),
+char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(110),char(105),char(115),char(111),char(116),char(114),char(111),char(112),char(105),
+char(99),char(70),char(114),char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(99),char(111),char(110),char(116),char(97),char(99),char(116),char(80),
+char(114),char(111),char(99),char(101),char(115),char(115),char(105),char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(100),char(101),char(97),char(99),char(116),char(105),char(118),char(97),char(116),char(105),char(111),char(110),char(84),char(105),char(109),char(101),char(0),char(109),char(95),
+char(102),char(114),char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(114),char(111),char(108),char(108),char(105),char(110),char(103),char(70),char(114),
+char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(105),char(116),char(117),char(116),char(105),char(111),char(110),
+char(0),char(109),char(95),char(104),char(105),char(116),char(70),char(114),char(97),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(99),char(99),char(100),
+char(83),char(119),char(101),char(112),char(116),char(83),char(112),char(104),char(101),char(114),char(101),char(82),char(97),char(100),char(105),char(117),char(115),char(0),char(109),char(95),
+char(99),char(99),char(100),char(77),char(111),char(116),char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(104),char(97),char(115),char(65),char(110),char(105),char(115),char(111),char(116),char(114),char(111),char(112),char(105),char(99),char(70),char(114),char(105),char(99),char(116),
+char(105),char(111),char(110),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(70),char(108),char(97),char(103),char(115),
+char(0),char(109),char(95),char(105),char(115),char(108),char(97),char(110),char(100),char(84),char(97),char(103),char(49),char(0),char(109),char(95),char(99),char(111),char(109),char(112),
+char(97),char(110),char(105),char(111),char(110),char(73),char(100),char(0),char(109),char(95),char(97),char(99),char(116),char(105),char(118),char(97),char(116),char(105),char(111),char(110),
+char(83),char(116),char(97),char(116),char(101),char(49),char(0),char(109),char(95),char(105),char(110),char(116),char(101),char(114),char(110),char(97),char(108),char(84),char(121),char(112),
+char(101),char(0),char(109),char(95),char(99),char(104),char(101),char(99),char(107),char(67),char(111),char(108),char(108),char(105),char(100),char(101),char(87),char(105),char(116),char(104),
+char(0),char(109),char(95),char(115),char(111),char(108),char(118),char(101),char(114),char(73),char(110),char(102),char(111),char(0),char(109),char(95),char(103),char(114),char(97),char(118),
+char(105),char(116),char(121),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),
+char(116),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(105),char(110),char(118),char(73),char(110),char(101),char(114),char(116),char(105),char(97),char(84),char(101),
+char(110),char(115),char(111),char(114),char(87),char(111),char(114),char(108),char(100),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(86),char(101),
+char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(86),char(101),char(108),char(111),
+char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(70),char(97),char(99),char(116),char(111),char(114),
+char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(103),char(114),
+char(97),char(118),char(105),char(116),char(121),char(95),char(97),char(99),char(99),char(101),char(108),char(101),char(114),char(97),char(116),char(105),char(111),char(110),char(0),char(109),
+char(95),char(105),char(110),char(118),char(73),char(110),char(101),char(114),char(116),char(105),char(97),char(76),char(111),char(99),char(97),char(108),char(0),char(109),char(95),char(116),
+char(111),char(116),char(97),char(108),char(70),char(111),char(114),char(99),char(101),char(0),char(109),char(95),char(116),char(111),char(116),char(97),char(108),char(84),char(111),char(114),
+char(113),char(117),char(101),char(0),char(109),char(95),char(105),char(110),char(118),char(101),char(114),char(115),char(101),char(77),char(97),char(115),char(115),char(0),char(109),char(95),
+char(108),char(105),char(110),char(101),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(97),char(110),char(103),char(117),
+char(108),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(97),char(100),char(100),char(105),char(116),char(105),char(111),
+char(110),char(97),char(108),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(97),
+char(100),char(100),char(105),char(116),char(105),char(111),char(110),char(97),char(108),char(76),char(105),char(110),char(101),char(97),char(114),char(68),char(97),char(109),char(112),char(105),
+char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(83),char(113),char(114),char(0),char(109),char(95),char(97),char(100),char(100),
+char(105),char(116),char(105),char(111),char(110),char(97),char(108),char(65),char(110),char(103),char(117),char(108),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),
+char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(83),char(113),char(114),char(0),char(109),char(95),char(97),char(100),char(100),char(105),
+char(116),char(105),char(111),char(110),char(97),char(108),char(65),char(110),char(103),char(117),char(108),char(97),char(114),char(68),char(97),char(109),char(112),char(105),char(110),char(103),
+char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(83),char(108),char(101),char(101),char(112),
+char(105),char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),
+char(97),char(114),char(83),char(108),char(101),char(101),char(112),char(105),char(110),char(103),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),
+char(109),char(95),char(97),char(100),char(100),char(105),char(116),char(105),char(111),char(110),char(97),char(108),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(0),
+char(109),char(95),char(110),char(117),char(109),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(82),char(111),char(119),char(115),char(0),
+char(110),char(117),char(98),char(0),char(42),char(109),char(95),char(114),char(98),char(65),char(0),char(42),char(109),char(95),char(114),char(98),char(66),char(0),char(109),char(95),
+char(111),char(98),char(106),char(101),char(99),char(116),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(117),char(115),char(101),char(114),char(67),char(111),char(110),
+char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(84),char(121),char(112),char(101),char(0),char(109),char(95),char(117),char(115),char(101),char(114),char(67),char(111),
+char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(73),char(100),char(0),char(109),char(95),char(110),char(101),char(101),char(100),char(115),char(70),char(101),
+char(101),char(100),char(98),char(97),char(99),char(107),char(0),char(109),char(95),char(97),char(112),char(112),char(108),char(105),char(101),char(100),char(73),char(109),char(112),char(117),
+char(108),char(115),char(101),char(0),char(109),char(95),char(100),char(98),char(103),char(68),char(114),char(97),char(119),char(83),char(105),char(122),char(101),char(0),char(109),char(95),
+char(100),char(105),char(115),char(97),char(98),char(108),char(101),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(115),char(66),char(101),char(116),
+char(119),char(101),char(101),char(110),char(76),char(105),char(110),char(107),char(101),char(100),char(66),char(111),char(100),char(105),char(101),char(115),char(0),char(109),char(95),char(111),
+char(118),char(101),char(114),char(114),char(105),char(100),char(101),char(78),char(117),char(109),char(83),char(111),char(108),char(118),char(101),char(114),char(73),char(116),char(101),char(114),
+char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(98),char(114),char(101),char(97),char(107),char(105),char(110),char(103),char(73),char(109),char(112),
+char(117),char(108),char(115),char(101),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(105),char(115),char(69),char(110),
+char(97),char(98),char(108),char(101),char(100),char(0),char(109),char(95),char(116),char(121),char(112),char(101),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),
+char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(112),char(105),char(118),char(111),char(116),char(73),char(110),char(65),char(0),char(109),char(95),
+char(112),char(105),char(118),char(111),char(116),char(73),char(110),char(66),char(0),char(109),char(95),char(114),char(98),char(65),char(70),char(114),char(97),char(109),char(101),char(0),
+char(109),char(95),char(114),char(98),char(66),char(70),char(114),char(97),char(109),char(101),char(0),char(109),char(95),char(117),char(115),char(101),char(82),char(101),char(102),char(101),
+char(114),char(101),char(110),char(99),char(101),char(70),char(114),char(97),char(109),char(101),char(65),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),
+char(114),char(79),char(110),char(108),char(121),char(0),char(109),char(95),char(101),char(110),char(97),char(98),char(108),char(101),char(65),char(110),char(103),char(117),char(108),char(97),
+char(114),char(77),char(111),char(116),char(111),char(114),char(0),char(109),char(95),char(109),char(111),char(116),char(111),char(114),char(84),char(97),char(114),char(103),char(101),char(116),
+char(86),char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(109),char(97),char(120),char(77),char(111),char(116),char(111),char(114),char(73),
+char(109),char(112),char(117),char(108),char(115),char(101),char(0),char(109),char(95),char(108),char(111),char(119),char(101),char(114),char(76),char(105),char(109),char(105),char(116),char(0),
+char(109),char(95),char(117),char(112),char(112),char(101),char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(108),char(105),char(109),char(105),char(116),
+char(83),char(111),char(102),char(116),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(98),char(105),char(97),char(115),char(70),char(97),char(99),char(116),char(111),
+char(114),char(0),char(109),char(95),char(114),char(101),char(108),char(97),char(120),char(97),char(116),char(105),char(111),char(110),char(70),char(97),char(99),char(116),char(111),char(114),
+char(0),char(109),char(95),char(115),char(119),char(105),char(110),char(103),char(83),char(112),char(97),char(110),char(49),char(0),char(109),char(95),char(115),char(119),char(105),char(110),
+char(103),char(83),char(112),char(97),char(110),char(50),char(0),char(109),char(95),char(116),char(119),char(105),char(115),char(116),char(83),char(112),char(97),char(110),char(0),char(109),
+char(95),char(100),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(85),char(112),char(112),
+char(101),char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(76),char(111),char(119),char(101),
+char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(85),char(112),char(112),char(101),
+char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(97),char(110),char(103),char(117),char(108),char(97),char(114),char(76),char(111),char(119),char(101),
+char(114),char(76),char(105),char(109),char(105),char(116),char(0),char(109),char(95),char(117),char(115),char(101),char(76),char(105),char(110),char(101),char(97),char(114),char(82),char(101),
+char(102),char(101),char(114),char(101),char(110),char(99),char(101),char(70),char(114),char(97),char(109),char(101),char(65),char(0),char(109),char(95),char(117),char(115),char(101),char(79),
+char(102),char(102),char(115),char(101),char(116),char(70),char(111),char(114),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(70),char(114),
+char(97),char(109),char(101),char(0),char(109),char(95),char(54),char(100),char(111),char(102),char(68),char(97),char(116),char(97),char(0),char(109),char(95),char(115),char(112),char(114),
+char(105),char(110),char(103),char(69),char(110),char(97),char(98),char(108),char(101),char(100),char(91),char(54),char(93),char(0),char(109),char(95),char(101),char(113),char(117),char(105),
+char(108),char(105),char(98),char(114),char(105),char(117),char(109),char(80),char(111),char(105),char(110),char(116),char(91),char(54),char(93),char(0),char(109),char(95),char(115),char(112),
+char(114),char(105),char(110),char(103),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(91),char(54),char(93),char(0),char(109),char(95),char(115),
+char(112),char(114),char(105),char(110),char(103),char(68),char(97),char(109),char(112),char(105),char(110),char(103),char(91),char(54),char(93),char(0),char(109),char(95),char(116),char(97),
+char(117),char(0),char(109),char(95),char(116),char(105),char(109),char(101),char(83),char(116),char(101),char(112),char(0),char(109),char(95),char(109),char(97),char(120),char(69),char(114),
+char(114),char(111),char(114),char(82),char(101),char(100),char(117),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(115),char(111),char(114),char(0),char(109),
+char(95),char(101),char(114),char(112),char(0),char(109),char(95),char(101),char(114),char(112),char(50),char(0),char(109),char(95),char(103),char(108),char(111),char(98),char(97),char(108),
+char(67),char(102),char(109),char(0),char(109),char(95),char(115),char(112),char(108),char(105),char(116),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(80),char(101),
+char(110),char(101),char(116),char(114),char(97),char(116),char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(115),char(112),char(108),char(105),char(116),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(84),char(117),char(114),char(110),char(69),char(114),char(112),
+char(0),char(109),char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(83),char(108),char(111),char(112),char(0),char(109),char(95),char(119),char(97),char(114),char(109),
+char(115),char(116),char(97),char(114),char(116),char(105),char(110),char(103),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(109),char(97),char(120),
+char(71),char(121),char(114),char(111),char(115),char(99),char(111),char(112),char(105),char(99),char(70),char(111),char(114),char(99),char(101),char(0),char(109),char(95),char(115),char(105),
+char(110),char(103),char(108),char(101),char(65),char(120),char(105),char(115),char(82),char(111),char(108),char(108),char(105),char(110),char(103),char(70),char(114),char(105),char(99),char(116),
+char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),char(95),char(110),char(117),char(109),char(73),char(116),
+char(101),char(114),char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(115),char(111),char(108),char(118),char(101),char(114),char(77),char(111),char(100),
+char(101),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(105),char(110),char(103),char(67),char(111),char(110),char(116),char(97),char(99),char(116),char(82),char(101),
+char(115),char(116),char(105),char(116),char(117),char(116),char(105),char(111),char(110),char(84),char(104),char(114),char(101),char(115),char(104),char(111),char(108),char(100),char(0),char(109),
+char(95),char(109),char(105),char(110),char(105),char(109),char(117),char(109),char(83),char(111),char(108),char(118),char(101),char(114),char(66),char(97),char(116),char(99),char(104),char(83),
+char(105),char(122),char(101),char(0),char(109),char(95),char(115),char(112),char(108),char(105),char(116),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(0),char(109),
+char(95),char(108),char(105),char(110),char(101),char(97),char(114),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(97),
+char(110),char(103),char(117),char(108),char(97),char(114),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(118),char(111),
+char(108),char(117),char(109),char(101),char(83),char(116),char(105),char(102),char(102),char(110),char(101),char(115),char(115),char(0),char(42),char(109),char(95),char(109),char(97),char(116),
+char(101),char(114),char(105),char(97),char(108),char(0),char(109),char(95),char(112),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(112),
+char(114),char(101),char(118),char(105),char(111),char(117),char(115),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(118),char(101),
+char(108),char(111),char(99),char(105),char(116),char(121),char(0),char(109),char(95),char(97),char(99),char(99),char(117),char(109),char(117),char(108),char(97),char(116),char(101),char(100),
+char(70),char(111),char(114),char(99),char(101),char(0),char(109),char(95),char(110),char(111),char(114),char(109),char(97),char(108),char(0),char(109),char(95),char(97),char(114),char(101),
+char(97),char(0),char(109),char(95),char(97),char(116),char(116),char(97),char(99),char(104),char(0),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),
+char(105),char(99),char(101),char(115),char(91),char(50),char(93),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(76),char(101),char(110),char(103),char(116),char(104),
+char(0),char(109),char(95),char(98),char(98),char(101),char(110),char(100),char(105),char(110),char(103),char(0),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),
+char(100),char(105),char(99),char(101),char(115),char(91),char(51),char(93),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(65),char(114),char(101),char(97),char(0),
+char(109),char(95),char(99),char(48),char(91),char(52),char(93),char(0),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),char(105),char(99),char(101),
+char(115),char(91),char(52),char(93),char(0),char(109),char(95),char(114),char(101),char(115),char(116),char(86),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),
+char(99),char(49),char(0),char(109),char(95),char(99),char(50),char(0),char(109),char(95),char(99),char(48),char(0),char(109),char(95),char(108),char(111),char(99),char(97),char(108),
+char(70),char(114),char(97),char(109),char(101),char(0),char(42),char(109),char(95),char(114),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(0),char(109),
+char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),char(101),char(120),char(0),char(109),char(95),char(97),char(101),char(114),char(111),char(77),char(111),char(100),
+char(101),char(108),char(0),char(109),char(95),char(98),char(97),char(117),char(109),char(103),char(97),char(114),char(116),char(101),char(0),char(109),char(95),char(100),char(114),char(97),
+char(103),char(0),char(109),char(95),char(108),char(105),char(102),char(116),char(0),char(109),char(95),char(112),char(114),char(101),char(115),char(115),char(117),char(114),char(101),char(0),
+char(109),char(95),char(118),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),char(100),char(121),char(110),char(97),char(109),char(105),char(99),char(70),char(114),
+char(105),char(99),char(116),char(105),char(111),char(110),char(0),char(109),char(95),char(112),char(111),char(115),char(101),char(77),char(97),char(116),char(99),char(104),char(0),char(109),
+char(95),char(114),char(105),char(103),char(105),char(100),char(67),char(111),char(110),char(116),char(97),char(99),char(116),char(72),char(97),char(114),char(100),char(110),char(101),char(115),
+char(115),char(0),char(109),char(95),char(107),char(105),char(110),char(101),char(116),char(105),char(99),char(67),char(111),char(110),char(116),char(97),char(99),char(116),char(72),char(97),
+char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(67),char(111),char(110),char(116),char(97),char(99),char(116),
+char(72),char(97),char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(97),char(110),char(99),char(104),char(111),char(114),char(72),char(97),char(114),
+char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(82),char(105),char(103),char(105),char(100),char(67),char(108),char(117),
+char(115),char(116),char(101),char(114),char(72),char(97),char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(75),
+char(105),char(110),char(101),char(116),char(105),char(99),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(72),char(97),char(114),char(100),char(110),char(101),char(115),
+char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(83),char(111),char(102),char(116),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(72),
+char(97),char(114),char(100),char(110),char(101),char(115),char(115),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(82),char(105),char(103),char(105),char(100),char(67),
+char(108),char(117),char(115),char(116),char(101),char(114),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(83),char(112),char(108),char(105),char(116),char(0),char(109),
+char(95),char(115),char(111),char(102),char(116),char(75),char(105),char(110),char(101),char(116),char(105),char(99),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(73),
+char(109),char(112),char(117),char(108),char(115),char(101),char(83),char(112),char(108),char(105),char(116),char(0),char(109),char(95),char(115),char(111),char(102),char(116),char(83),char(111),
+char(102),char(116),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(83),char(112),char(108),char(105),
+char(116),char(0),char(109),char(95),char(109),char(97),char(120),char(86),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),char(116),char(105),char(109),char(101),
+char(83),char(99),char(97),char(108),char(101),char(0),char(109),char(95),char(118),char(101),char(108),char(111),char(99),char(105),char(116),char(121),char(73),char(116),char(101),char(114),
+char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(112),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(73),char(116),char(101),
+char(114),char(97),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(100),char(114),char(105),char(102),char(116),char(73),char(116),char(101),char(114),char(97),
+char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(99),char(108),char(117),char(115),char(116),char(101),char(114),char(73),char(116),char(101),char(114),char(97),
+char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(114),char(111),char(116),char(0),char(109),char(95),char(115),char(99),char(97),char(108),char(101),char(0),
+char(109),char(95),char(97),char(113),char(113),char(0),char(109),char(95),char(99),char(111),char(109),char(0),char(42),char(109),char(95),char(112),char(111),char(115),char(105),char(116),
+char(105),char(111),char(110),char(115),char(0),char(42),char(109),char(95),char(119),char(101),char(105),char(103),char(104),char(116),char(115),char(0),char(109),char(95),char(110),char(117),
+char(109),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(87),char(101),char(105),char(103),
+char(116),char(115),char(0),char(109),char(95),char(98),char(118),char(111),char(108),char(117),char(109),char(101),char(0),char(109),char(95),char(98),char(102),char(114),char(97),char(109),
+char(101),char(0),char(109),char(95),char(102),char(114),char(97),char(109),char(101),char(120),char(102),char(111),char(114),char(109),char(0),char(109),char(95),char(108),char(111),char(99),
+char(105),char(105),char(0),char(109),char(95),char(105),char(110),char(118),char(119),char(105),char(0),char(109),char(95),char(118),char(105),char(109),char(112),char(117),char(108),char(115),
+char(101),char(115),char(91),char(50),char(93),char(0),char(109),char(95),char(100),char(105),char(109),char(112),char(117),char(108),char(115),char(101),char(115),char(91),char(50),char(93),
+char(0),char(109),char(95),char(108),char(118),char(0),char(109),char(95),char(97),char(118),char(0),char(42),char(109),char(95),char(102),char(114),char(97),char(109),char(101),char(114),
+char(101),char(102),char(115),char(0),char(42),char(109),char(95),char(110),char(111),char(100),char(101),char(73),char(110),char(100),char(105),char(99),char(101),char(115),char(0),char(42),
+char(109),char(95),char(109),char(97),char(115),char(115),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(70),char(114),char(97),char(109),char(101),char(82),
+char(101),char(102),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(78),char(111),char(100),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),
+char(77),char(97),char(115),char(115),char(101),char(115),char(0),char(109),char(95),char(105),char(100),char(109),char(97),char(115),char(115),char(0),char(109),char(95),char(105),char(109),
+char(97),char(115),char(115),char(0),char(109),char(95),char(110),char(118),char(105),char(109),char(112),char(117),char(108),char(115),char(101),char(115),char(0),char(109),char(95),char(110),
+char(100),char(105),char(109),char(112),char(117),char(108),char(115),char(101),char(115),char(0),char(109),char(95),char(110),char(100),char(97),char(109),char(112),char(105),char(110),char(103),
+char(0),char(109),char(95),char(108),char(100),char(97),char(109),char(112),char(105),char(110),char(103),char(0),char(109),char(95),char(97),char(100),char(97),char(109),char(112),char(105),
+char(110),char(103),char(0),char(109),char(95),char(109),char(97),char(116),char(99),char(104),char(105),char(110),char(103),char(0),char(109),char(95),char(109),char(97),char(120),char(83),
+char(101),char(108),char(102),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(73),char(109),char(112),char(117),char(108),char(115),char(101),char(0),
+char(109),char(95),char(115),char(101),char(108),char(102),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(73),char(109),char(112),char(117),char(108),
+char(115),char(101),char(70),char(97),char(99),char(116),char(111),char(114),char(0),char(109),char(95),char(99),char(111),char(110),char(116),char(97),char(105),char(110),char(115),char(65),
+char(110),char(99),char(104),char(111),char(114),char(0),char(109),char(95),char(99),char(111),char(108),char(108),char(105),char(100),char(101),char(0),char(109),char(95),char(99),char(108),
+char(117),char(115),char(116),char(101),char(114),char(73),char(110),char(100),char(101),char(120),char(0),char(42),char(109),char(95),char(98),char(111),char(100),char(121),char(65),char(0),
+char(42),char(109),char(95),char(98),char(111),char(100),char(121),char(66),char(0),char(109),char(95),char(114),char(101),char(102),char(115),char(91),char(50),char(93),char(0),char(109),
+char(95),char(99),char(102),char(109),char(0),char(109),char(95),char(115),char(112),char(108),char(105),char(116),char(0),char(109),char(95),char(100),char(101),char(108),char(101),char(116),
+char(101),char(0),char(109),char(95),char(114),char(101),char(108),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(91),char(50),char(93),char(0),char(109),
+char(95),char(98),char(111),char(100),char(121),char(65),char(116),char(121),char(112),char(101),char(0),char(109),char(95),char(98),char(111),char(100),char(121),char(66),char(116),char(121),
+char(112),char(101),char(0),char(109),char(95),char(106),char(111),char(105),char(110),char(116),char(84),char(121),char(112),char(101),char(0),char(42),char(109),char(95),char(112),char(111),
+char(115),char(101),char(0),char(42),char(42),char(109),char(95),char(109),char(97),char(116),char(101),char(114),char(105),char(97),char(108),char(115),char(0),char(42),char(109),char(95),
+char(110),char(111),char(100),char(101),char(115),char(0),char(42),char(109),char(95),char(108),char(105),char(110),char(107),char(115),char(0),char(42),char(109),char(95),char(102),char(97),
+char(99),char(101),char(115),char(0),char(42),char(109),char(95),char(116),char(101),char(116),char(114),char(97),char(104),char(101),char(100),char(114),char(97),char(0),char(42),char(109),
+char(95),char(97),char(110),char(99),char(104),char(111),char(114),char(115),char(0),char(42),char(109),char(95),char(99),char(108),char(117),char(115),char(116),char(101),char(114),char(115),
+char(0),char(42),char(109),char(95),char(106),char(111),char(105),char(110),char(116),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(77),char(97),char(116),char(101),
+char(114),char(105),char(97),char(108),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(76),char(105),char(110),char(107),char(115),char(0),char(109),char(95),char(110),
+char(117),char(109),char(70),char(97),char(99),char(101),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(84),char(101),char(116),char(114),char(97),char(104),char(101),
+char(100),char(114),char(97),char(0),char(109),char(95),char(110),char(117),char(109),char(65),char(110),char(99),char(104),char(111),char(114),char(115),char(0),char(109),char(95),char(110),
+char(117),char(109),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(115),char(0),char(109),char(95),char(110),char(117),char(109),char(74),char(111),char(105),char(110),
+char(116),char(115),char(0),char(109),char(95),char(99),char(111),char(110),char(102),char(105),char(103),char(0),char(84),char(89),char(80),char(69),char(76),char(0),char(0),char(0),
+char(99),char(104),char(97),char(114),char(0),char(117),char(99),char(104),char(97),char(114),char(0),char(115),char(104),char(111),char(114),char(116),char(0),char(117),char(115),char(104),
+char(111),char(114),char(116),char(0),char(105),char(110),char(116),char(0),char(108),char(111),char(110),char(103),char(0),char(117),char(108),char(111),char(110),char(103),char(0),char(102),
+char(108),char(111),char(97),char(116),char(0),char(100),char(111),char(117),char(98),char(108),char(101),char(0),char(118),char(111),char(105),char(100),char(0),char(80),char(111),char(105),
+char(110),char(116),char(101),char(114),char(65),char(114),char(114),char(97),char(121),char(0),char(98),char(116),char(80),char(104),char(121),char(115),char(105),char(99),char(115),char(83),
+char(121),char(115),char(116),char(101),char(109),char(0),char(76),char(105),char(115),char(116),char(66),char(97),char(115),char(101),char(0),char(98),char(116),char(86),char(101),char(99),
+char(116),char(111),char(114),char(51),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(86),char(101),char(99),char(116),
+char(111),char(114),char(51),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(77),char(97),char(116),char(114),
+char(105),char(120),char(51),char(120),char(51),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(77),char(97),char(116),
+char(114),char(105),char(120),char(51),char(120),char(51),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),
+char(114),char(97),char(110),char(115),char(102),char(111),char(114),char(109),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(84),char(114),char(97),char(110),char(115),char(102),char(111),char(114),char(109),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),
+char(98),char(116),char(66),char(118),char(104),char(83),char(117),char(98),char(116),char(114),char(101),char(101),char(73),char(110),char(102),char(111),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(79),char(112),char(116),char(105),char(109),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(78),char(111),char(100),char(101),char(70),
+char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(79),char(112),char(116),char(105),char(109),char(105),char(122),char(101),char(100),
+char(66),char(118),char(104),char(78),char(111),char(100),char(101),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(78),char(111),char(100),char(101),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(70),char(108),char(111),char(97),char(116),
+char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(81),char(117),char(97),char(110),char(116),char(105),char(122),char(101),char(100),char(66),char(118),char(104),char(68),
+char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),
+char(110),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(83),char(116),char(97),char(116),char(105),char(99),char(80),
+char(108),char(97),char(110),char(101),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(118),
+char(101),char(120),char(73),char(110),char(116),char(101),char(114),char(110),char(97),char(108),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),
+char(98),char(116),char(80),char(111),char(115),char(105),char(116),char(105),char(111),char(110),char(65),char(110),char(100),char(82),char(97),char(100),char(105),char(117),char(115),char(0),
+char(98),char(116),char(77),char(117),char(108),char(116),char(105),char(83),char(112),char(104),char(101),char(114),char(101),char(83),char(104),char(97),char(112),char(101),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(73),char(110),char(116),char(73),char(110),char(100),char(101),char(120),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(104),char(111),char(114),char(116),char(73),char(110),char(116),char(73),char(110),char(100),char(101),char(120),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(104),char(111),char(114),char(116),char(73),char(110),char(116),char(73),char(110),char(100),char(101),char(120),char(84),char(114),char(105),char(112),char(108),char(101),char(116),
+char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(104),char(97),char(114),char(73),char(110),char(100),char(101),char(120),char(84),char(114),char(105),char(112),
+char(108),char(101),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(77),char(101),char(115),char(104),char(80),char(97),char(114),char(116),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(83),char(116),char(114),char(105),char(100),char(105),char(110),char(103),char(77),char(101),char(115),char(104),char(73),char(110),char(116),
+char(101),char(114),char(102),char(97),char(99),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),char(114),char(105),char(97),char(110),char(103),char(108),
+char(101),char(77),char(101),char(115),char(104),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),char(114),char(105),
+char(97),char(110),char(103),char(108),char(101),char(73),char(110),char(102),char(111),char(77),char(97),char(112),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(83),
+char(99),char(97),char(108),char(101),char(100),char(84),char(114),char(105),char(97),char(110),char(103),char(108),char(101),char(77),char(101),char(115),char(104),char(83),char(104),char(97),
+char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(109),char(112),char(111),char(117),char(110),char(100),char(83),char(104),char(97),
+char(112),char(101),char(67),char(104),char(105),char(108),char(100),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(109),char(112),char(111),char(117),
+char(110),char(100),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(121),char(108),char(105),char(110),char(100),
+char(101),char(114),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(97),char(112),char(115),char(117),char(108),
+char(101),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(84),char(114),char(105),char(97),char(110),char(103),char(108),
+char(101),char(73),char(110),char(102),char(111),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(71),char(73),char(109),char(112),char(97),char(99),char(116),char(77),
+char(101),char(115),char(104),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(118),char(101),
+char(120),char(72),char(117),char(108),char(108),char(83),char(104),char(97),char(112),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(108),
+char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),char(116),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(108),char(108),char(105),char(115),char(105),char(111),char(110),char(79),char(98),char(106),char(101),char(99),char(116),
+char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(68),char(121),char(110),char(97),char(109),char(105),char(99),char(115),
+char(87),char(111),char(114),char(108),char(100),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),
+char(110),char(116),char(97),char(99),char(116),char(83),char(111),char(108),char(118),char(101),char(114),char(73),char(110),char(102),char(111),char(68),char(111),char(117),char(98),char(108),
+char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(68),char(121),char(110),char(97),char(109),char(105),char(99),char(115),char(87),char(111),char(114),char(108),
+char(100),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(116),char(97),char(99),char(116),
+char(83),char(111),char(108),char(118),char(101),char(114),char(73),char(110),char(102),char(111),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),
+char(98),char(116),char(82),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(82),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(73),char(110),char(102),char(111),char(49),
+char(0),char(98),char(116),char(84),char(121),char(112),char(101),char(100),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(97),
+char(116),char(97),char(0),char(98),char(116),char(82),char(105),char(103),char(105),char(100),char(66),char(111),char(100),char(121),char(68),char(97),char(116),char(97),char(0),char(98),
+char(116),char(80),char(111),char(105),char(110),char(116),char(50),char(80),char(111),char(105),char(110),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),
+char(110),char(116),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(80),char(111),char(105),char(110),char(116),char(50),
+char(80),char(111),char(105),char(110),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(111),char(117),char(98),char(108),
+char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(72),char(105),char(110),char(103),char(101),char(67),char(111),char(110),char(115),char(116),char(114),char(97),
+char(105),char(110),char(116),char(68),char(111),char(117),char(98),char(108),char(101),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(72),char(105),char(110),char(103),
+char(101),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),
+char(0),char(98),char(116),char(67),char(111),char(110),char(101),char(84),char(119),char(105),char(115),char(116),char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),
+char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(71),char(101),char(110),char(101),char(114),char(105),char(99),char(54),char(68),char(111),char(102),
+char(67),char(111),char(110),char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(71),char(101),char(110),
+char(101),char(114),char(105),char(99),char(54),char(68),char(111),char(102),char(83),char(112),char(114),char(105),char(110),char(103),char(67),char(111),char(110),char(115),char(116),char(114),
+char(97),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),char(83),char(108),char(105),char(100),char(101),char(114),char(67),char(111),char(110),
+char(115),char(116),char(114),char(97),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),
+char(77),char(97),char(116),char(101),char(114),char(105),char(97),char(108),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),
+char(121),char(78),char(111),char(100),char(101),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(76),char(105),
+char(110),char(107),char(68),char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(70),char(97),char(99),char(101),char(68),
+char(97),char(116),char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(84),char(101),char(116),char(114),char(97),char(68),char(97),char(116),
+char(97),char(0),char(83),char(111),char(102),char(116),char(82),char(105),char(103),char(105),char(100),char(65),char(110),char(99),char(104),char(111),char(114),char(68),char(97),char(116),
+char(97),char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(67),char(111),char(110),char(102),char(105),char(103),char(68),char(97),char(116),char(97),
+char(0),char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(80),char(111),char(115),char(101),char(68),char(97),char(116),char(97),char(0),char(83),char(111),
+char(102),char(116),char(66),char(111),char(100),char(121),char(67),char(108),char(117),char(115),char(116),char(101),char(114),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(74),char(111),char(105),char(110),char(116),char(68),char(97),char(116),char(97),char(0),char(98),char(116),
+char(83),char(111),char(102),char(116),char(66),char(111),char(100),char(121),char(70),char(108),char(111),char(97),char(116),char(68),char(97),char(116),char(97),char(0),char(0),char(0),
+char(84),char(76),char(69),char(78),char(1),char(0),char(1),char(0),char(2),char(0),char(2),char(0),char(4),char(0),char(4),char(0),char(4),char(0),char(4),char(0),
+char(8),char(0),char(0),char(0),char(16),char(0),char(48),char(0),char(16),char(0),char(16),char(0),char(32),char(0),char(48),char(0),char(96),char(0),char(64),char(0),
+char(-128),char(0),char(20),char(0),char(48),char(0),char(80),char(0),char(16),char(0),char(96),char(0),char(-112),char(0),char(16),char(0),char(56),char(0),char(56),char(0),
+char(20),char(0),char(72),char(0),char(4),char(0),char(4),char(0),char(8),char(0),char(4),char(0),char(56),char(0),char(32),char(0),char(80),char(0),char(72),char(0),
+char(96),char(0),char(80),char(0),char(32),char(0),char(64),char(0),char(64),char(0),char(16),char(0),char(72),char(0),char(80),char(0),char(-32),char(1),char(16),char(1),
+char(-72),char(0),char(-104),char(0),char(104),char(0),char(88),char(0),char(-8),char(1),char(-80),char(3),char(8),char(0),char(64),char(0),char(0),char(0),char(96),char(0),
+char(-128),char(0),char(104),char(1),char(-24),char(0),char(-32),char(0),char(8),char(1),char(104),char(1),char(-40),char(0),char(16),char(0),char(104),char(0),char(24),char(0),
+char(40),char(0),char(104),char(0),char(96),char(0),char(104),char(0),char(-56),char(0),char(104),char(1),char(112),char(0),char(-32),char(1),char(83),char(84),char(82),char(67),
+char(65),char(0),char(0),char(0),char(10),char(0),char(3),char(0),char(4),char(0),char(0),char(0),char(4),char(0),char(1),char(0),char(9),char(0),char(2),char(0),
+char(11),char(0),char(3),char(0),char(10),char(0),char(3),char(0),char(10),char(0),char(4),char(0),char(10),char(0),char(5),char(0),char(12),char(0),char(2),char(0),
+char(9),char(0),char(6),char(0),char(9),char(0),char(7),char(0),char(13),char(0),char(1),char(0),char(7),char(0),char(8),char(0),char(14),char(0),char(1),char(0),
+char(8),char(0),char(8),char(0),char(15),char(0),char(1),char(0),char(13),char(0),char(9),char(0),char(16),char(0),char(1),char(0),char(14),char(0),char(9),char(0),
+char(17),char(0),char(2),char(0),char(15),char(0),char(10),char(0),char(13),char(0),char(11),char(0),char(18),char(0),char(2),char(0),char(16),char(0),char(10),char(0),
+char(14),char(0),char(11),char(0),char(19),char(0),char(4),char(0),char(4),char(0),char(12),char(0),char(4),char(0),char(13),char(0),char(2),char(0),char(14),char(0),
+char(2),char(0),char(15),char(0),char(20),char(0),char(6),char(0),char(13),char(0),char(16),char(0),char(13),char(0),char(17),char(0),char(4),char(0),char(18),char(0),
+char(4),char(0),char(19),char(0),char(4),char(0),char(20),char(0),char(0),char(0),char(21),char(0),char(21),char(0),char(6),char(0),char(14),char(0),char(16),char(0),
+char(14),char(0),char(17),char(0),char(4),char(0),char(18),char(0),char(4),char(0),char(19),char(0),char(4),char(0),char(20),char(0),char(0),char(0),char(21),char(0),
+char(22),char(0),char(3),char(0),char(2),char(0),char(14),char(0),char(2),char(0),char(15),char(0),char(4),char(0),char(22),char(0),char(23),char(0),char(12),char(0),
+char(13),char(0),char(23),char(0),char(13),char(0),char(24),char(0),char(13),char(0),char(25),char(0),char(4),char(0),char(26),char(0),char(4),char(0),char(27),char(0),
+char(4),char(0),char(28),char(0),char(4),char(0),char(29),char(0),char(20),char(0),char(30),char(0),char(22),char(0),char(31),char(0),char(19),char(0),char(32),char(0),
+char(4),char(0),char(33),char(0),char(4),char(0),char(34),char(0),char(24),char(0),char(12),char(0),char(14),char(0),char(23),char(0),char(14),char(0),char(24),char(0),
+char(14),char(0),char(25),char(0),char(4),char(0),char(26),char(0),char(4),char(0),char(27),char(0),char(4),char(0),char(28),char(0),char(4),char(0),char(29),char(0),
+char(21),char(0),char(30),char(0),char(22),char(0),char(31),char(0),char(4),char(0),char(33),char(0),char(4),char(0),char(34),char(0),char(19),char(0),char(32),char(0),
+char(25),char(0),char(3),char(0),char(0),char(0),char(35),char(0),char(4),char(0),char(36),char(0),char(0),char(0),char(37),char(0),char(26),char(0),char(5),char(0),
+char(25),char(0),char(38),char(0),char(13),char(0),char(39),char(0),char(13),char(0),char(40),char(0),char(7),char(0),char(41),char(0),char(0),char(0),char(21),char(0),
+char(27),char(0),char(5),char(0),char(25),char(0),char(38),char(0),char(13),char(0),char(39),char(0),char(13),char(0),char(42),char(0),char(7),char(0),char(43),char(0),
+char(4),char(0),char(44),char(0),char(28),char(0),char(2),char(0),char(13),char(0),char(45),char(0),char(7),char(0),char(46),char(0),char(29),char(0),char(4),char(0),
+char(27),char(0),char(47),char(0),char(28),char(0),char(48),char(0),char(4),char(0),char(49),char(0),char(0),char(0),char(37),char(0),char(30),char(0),char(1),char(0),
+char(4),char(0),char(50),char(0),char(31),char(0),char(2),char(0),char(2),char(0),char(50),char(0),char(0),char(0),char(51),char(0),char(32),char(0),char(2),char(0),
+char(2),char(0),char(52),char(0),char(0),char(0),char(51),char(0),char(33),char(0),char(2),char(0),char(0),char(0),char(52),char(0),char(0),char(0),char(53),char(0),
+char(34),char(0),char(8),char(0),char(13),char(0),char(54),char(0),char(14),char(0),char(55),char(0),char(30),char(0),char(56),char(0),char(32),char(0),char(57),char(0),
+char(33),char(0),char(58),char(0),char(31),char(0),char(59),char(0),char(4),char(0),char(60),char(0),char(4),char(0),char(61),char(0),char(35),char(0),char(4),char(0),
+char(34),char(0),char(62),char(0),char(13),char(0),char(63),char(0),char(4),char(0),char(64),char(0),char(0),char(0),char(37),char(0),char(36),char(0),char(7),char(0),
+char(25),char(0),char(38),char(0),char(35),char(0),char(65),char(0),char(23),char(0),char(66),char(0),char(24),char(0),char(67),char(0),char(37),char(0),char(68),char(0),
+char(7),char(0),char(43),char(0),char(0),char(0),char(69),char(0),char(38),char(0),char(2),char(0),char(36),char(0),char(70),char(0),char(13),char(0),char(39),char(0),
+char(39),char(0),char(4),char(0),char(17),char(0),char(71),char(0),char(25),char(0),char(72),char(0),char(4),char(0),char(73),char(0),char(7),char(0),char(74),char(0),
+char(40),char(0),char(4),char(0),char(25),char(0),char(38),char(0),char(39),char(0),char(75),char(0),char(4),char(0),char(76),char(0),char(7),char(0),char(43),char(0),
+char(41),char(0),char(3),char(0),char(27),char(0),char(47),char(0),char(4),char(0),char(77),char(0),char(0),char(0),char(37),char(0),char(42),char(0),char(3),char(0),
+char(27),char(0),char(47),char(0),char(4),char(0),char(77),char(0),char(0),char(0),char(37),char(0),char(43),char(0),char(4),char(0),char(4),char(0),char(78),char(0),
+char(7),char(0),char(79),char(0),char(7),char(0),char(80),char(0),char(7),char(0),char(81),char(0),char(37),char(0),char(14),char(0),char(4),char(0),char(82),char(0),
+char(4),char(0),char(83),char(0),char(43),char(0),char(84),char(0),char(4),char(0),char(85),char(0),char(7),char(0),char(86),char(0),char(7),char(0),char(87),char(0),
+char(7),char(0),char(88),char(0),char(7),char(0),char(89),char(0),char(7),char(0),char(90),char(0),char(4),char(0),char(91),char(0),char(4),char(0),char(92),char(0),
+char(4),char(0),char(93),char(0),char(4),char(0),char(94),char(0),char(0),char(0),char(37),char(0),char(44),char(0),char(5),char(0),char(25),char(0),char(38),char(0),
+char(35),char(0),char(65),char(0),char(13),char(0),char(39),char(0),char(7),char(0),char(43),char(0),char(4),char(0),char(95),char(0),char(45),char(0),char(5),char(0),
+char(27),char(0),char(47),char(0),char(13),char(0),char(96),char(0),char(14),char(0),char(97),char(0),char(4),char(0),char(98),char(0),char(0),char(0),char(99),char(0),
+char(46),char(0),char(25),char(0),char(9),char(0),char(100),char(0),char(9),char(0),char(101),char(0),char(25),char(0),char(102),char(0),char(0),char(0),char(35),char(0),
+char(18),char(0),char(103),char(0),char(18),char(0),char(104),char(0),char(14),char(0),char(105),char(0),char(14),char(0),char(106),char(0),char(14),char(0),char(107),char(0),
+char(8),char(0),char(108),char(0),char(8),char(0),char(109),char(0),char(8),char(0),char(110),char(0),char(8),char(0),char(111),char(0),char(8),char(0),char(112),char(0),
+char(8),char(0),char(113),char(0),char(8),char(0),char(114),char(0),char(8),char(0),char(115),char(0),char(4),char(0),char(116),char(0),char(4),char(0),char(117),char(0),
+char(4),char(0),char(118),char(0),char(4),char(0),char(119),char(0),char(4),char(0),char(120),char(0),char(4),char(0),char(121),char(0),char(4),char(0),char(122),char(0),
+char(0),char(0),char(37),char(0),char(47),char(0),char(25),char(0),char(9),char(0),char(100),char(0),char(9),char(0),char(101),char(0),char(25),char(0),char(102),char(0),
+char(0),char(0),char(35),char(0),char(17),char(0),char(103),char(0),char(17),char(0),char(104),char(0),char(13),char(0),char(105),char(0),char(13),char(0),char(106),char(0),
+char(13),char(0),char(107),char(0),char(7),char(0),char(108),char(0),char(7),char(0),char(109),char(0),char(7),char(0),char(110),char(0),char(7),char(0),char(111),char(0),
+char(7),char(0),char(112),char(0),char(7),char(0),char(113),char(0),char(7),char(0),char(114),char(0),char(7),char(0),char(115),char(0),char(4),char(0),char(116),char(0),
+char(4),char(0),char(117),char(0),char(4),char(0),char(118),char(0),char(4),char(0),char(119),char(0),char(4),char(0),char(120),char(0),char(4),char(0),char(121),char(0),
+char(4),char(0),char(122),char(0),char(0),char(0),char(37),char(0),char(48),char(0),char(2),char(0),char(49),char(0),char(123),char(0),char(14),char(0),char(124),char(0),
+char(50),char(0),char(2),char(0),char(51),char(0),char(123),char(0),char(13),char(0),char(124),char(0),char(52),char(0),char(21),char(0),char(47),char(0),char(125),char(0),
+char(15),char(0),char(126),char(0),char(13),char(0),char(127),char(0),char(13),char(0),char(-128),char(0),char(13),char(0),char(-127),char(0),char(13),char(0),char(-126),char(0),
+char(13),char(0),char(124),char(0),char(13),char(0),char(-125),char(0),char(13),char(0),char(-124),char(0),char(13),char(0),char(-123),char(0),char(13),char(0),char(-122),char(0),
+char(7),char(0),char(-121),char(0),char(7),char(0),char(-120),char(0),char(7),char(0),char(-119),char(0),char(7),char(0),char(-118),char(0),char(7),char(0),char(-117),char(0),
+char(7),char(0),char(-116),char(0),char(7),char(0),char(-115),char(0),char(7),char(0),char(-114),char(0),char(7),char(0),char(-113),char(0),char(4),char(0),char(-112),char(0),
+char(53),char(0),char(22),char(0),char(46),char(0),char(125),char(0),char(16),char(0),char(126),char(0),char(14),char(0),char(127),char(0),char(14),char(0),char(-128),char(0),
+char(14),char(0),char(-127),char(0),char(14),char(0),char(-126),char(0),char(14),char(0),char(124),char(0),char(14),char(0),char(-125),char(0),char(14),char(0),char(-124),char(0),
+char(14),char(0),char(-123),char(0),char(14),char(0),char(-122),char(0),char(8),char(0),char(-121),char(0),char(8),char(0),char(-120),char(0),char(8),char(0),char(-119),char(0),
+char(8),char(0),char(-118),char(0),char(8),char(0),char(-117),char(0),char(8),char(0),char(-116),char(0),char(8),char(0),char(-115),char(0),char(8),char(0),char(-114),char(0),
+char(8),char(0),char(-113),char(0),char(4),char(0),char(-112),char(0),char(0),char(0),char(37),char(0),char(54),char(0),char(2),char(0),char(4),char(0),char(-111),char(0),
+char(4),char(0),char(-110),char(0),char(55),char(0),char(13),char(0),char(56),char(0),char(-109),char(0),char(56),char(0),char(-108),char(0),char(0),char(0),char(35),char(0),
+char(4),char(0),char(-107),char(0),char(4),char(0),char(-106),char(0),char(4),char(0),char(-105),char(0),char(4),char(0),char(-104),char(0),char(7),char(0),char(-103),char(0),
+char(7),char(0),char(-102),char(0),char(4),char(0),char(-101),char(0),char(4),char(0),char(-100),char(0),char(7),char(0),char(-99),char(0),char(4),char(0),char(-98),char(0),
+char(57),char(0),char(3),char(0),char(55),char(0),char(-97),char(0),char(13),char(0),char(-96),char(0),char(13),char(0),char(-95),char(0),char(58),char(0),char(3),char(0),
+char(55),char(0),char(-97),char(0),char(14),char(0),char(-96),char(0),char(14),char(0),char(-95),char(0),char(59),char(0),char(13),char(0),char(55),char(0),char(-97),char(0),
+char(18),char(0),char(-94),char(0),char(18),char(0),char(-93),char(0),char(4),char(0),char(-92),char(0),char(4),char(0),char(-91),char(0),char(4),char(0),char(-90),char(0),
+char(7),char(0),char(-89),char(0),char(7),char(0),char(-88),char(0),char(7),char(0),char(-87),char(0),char(7),char(0),char(-86),char(0),char(7),char(0),char(-85),char(0),
+char(7),char(0),char(-84),char(0),char(7),char(0),char(-83),char(0),char(60),char(0),char(13),char(0),char(55),char(0),char(-97),char(0),char(17),char(0),char(-94),char(0),
+char(17),char(0),char(-93),char(0),char(4),char(0),char(-92),char(0),char(4),char(0),char(-91),char(0),char(4),char(0),char(-90),char(0),char(7),char(0),char(-89),char(0),
+char(7),char(0),char(-88),char(0),char(7),char(0),char(-87),char(0),char(7),char(0),char(-86),char(0),char(7),char(0),char(-85),char(0),char(7),char(0),char(-84),char(0),
+char(7),char(0),char(-83),char(0),char(61),char(0),char(11),char(0),char(55),char(0),char(-97),char(0),char(17),char(0),char(-94),char(0),char(17),char(0),char(-93),char(0),
+char(7),char(0),char(-82),char(0),char(7),char(0),char(-81),char(0),char(7),char(0),char(-80),char(0),char(7),char(0),char(-85),char(0),char(7),char(0),char(-84),char(0),
+char(7),char(0),char(-83),char(0),char(7),char(0),char(-79),char(0),char(0),char(0),char(21),char(0),char(62),char(0),char(9),char(0),char(55),char(0),char(-97),char(0),
+char(17),char(0),char(-94),char(0),char(17),char(0),char(-93),char(0),char(13),char(0),char(-78),char(0),char(13),char(0),char(-77),char(0),char(13),char(0),char(-76),char(0),
+char(13),char(0),char(-75),char(0),char(4),char(0),char(-74),char(0),char(4),char(0),char(-73),char(0),char(63),char(0),char(5),char(0),char(62),char(0),char(-72),char(0),
+char(4),char(0),char(-71),char(0),char(7),char(0),char(-70),char(0),char(7),char(0),char(-69),char(0),char(7),char(0),char(-68),char(0),char(64),char(0),char(9),char(0),
+char(55),char(0),char(-97),char(0),char(17),char(0),char(-94),char(0),char(17),char(0),char(-93),char(0),char(7),char(0),char(-78),char(0),char(7),char(0),char(-77),char(0),
+char(7),char(0),char(-76),char(0),char(7),char(0),char(-75),char(0),char(4),char(0),char(-74),char(0),char(4),char(0),char(-73),char(0),char(49),char(0),char(22),char(0),
+char(8),char(0),char(-67),char(0),char(8),char(0),char(-79),char(0),char(8),char(0),char(110),char(0),char(8),char(0),char(-66),char(0),char(8),char(0),char(112),char(0),
+char(8),char(0),char(-65),char(0),char(8),char(0),char(-64),char(0),char(8),char(0),char(-63),char(0),char(8),char(0),char(-62),char(0),char(8),char(0),char(-61),char(0),
+char(8),char(0),char(-60),char(0),char(8),char(0),char(-59),char(0),char(8),char(0),char(-58),char(0),char(8),char(0),char(-57),char(0),char(8),char(0),char(-56),char(0),
+char(8),char(0),char(-55),char(0),char(4),char(0),char(-54),char(0),char(4),char(0),char(-53),char(0),char(4),char(0),char(-52),char(0),char(4),char(0),char(-51),char(0),
+char(4),char(0),char(-50),char(0),char(0),char(0),char(37),char(0),char(51),char(0),char(22),char(0),char(7),char(0),char(-67),char(0),char(7),char(0),char(-79),char(0),
+char(7),char(0),char(110),char(0),char(7),char(0),char(-66),char(0),char(7),char(0),char(112),char(0),char(7),char(0),char(-65),char(0),char(7),char(0),char(-64),char(0),
+char(7),char(0),char(-63),char(0),char(7),char(0),char(-62),char(0),char(7),char(0),char(-61),char(0),char(7),char(0),char(-60),char(0),char(7),char(0),char(-59),char(0),
+char(7),char(0),char(-58),char(0),char(7),char(0),char(-57),char(0),char(7),char(0),char(-56),char(0),char(7),char(0),char(-55),char(0),char(4),char(0),char(-54),char(0),
+char(4),char(0),char(-53),char(0),char(4),char(0),char(-52),char(0),char(4),char(0),char(-51),char(0),char(4),char(0),char(-50),char(0),char(0),char(0),char(37),char(0),
+char(65),char(0),char(4),char(0),char(7),char(0),char(-49),char(0),char(7),char(0),char(-48),char(0),char(7),char(0),char(-47),char(0),char(4),char(0),char(78),char(0),
+char(66),char(0),char(10),char(0),char(65),char(0),char(-46),char(0),char(13),char(0),char(-45),char(0),char(13),char(0),char(-44),char(0),char(13),char(0),char(-43),char(0),
+char(13),char(0),char(-42),char(0),char(13),char(0),char(-41),char(0),char(7),char(0),char(-121),char(0),char(7),char(0),char(-40),char(0),char(4),char(0),char(-39),char(0),
+char(4),char(0),char(53),char(0),char(67),char(0),char(4),char(0),char(65),char(0),char(-46),char(0),char(4),char(0),char(-38),char(0),char(7),char(0),char(-37),char(0),
+char(4),char(0),char(-36),char(0),char(68),char(0),char(4),char(0),char(13),char(0),char(-41),char(0),char(65),char(0),char(-46),char(0),char(4),char(0),char(-35),char(0),
+char(7),char(0),char(-34),char(0),char(69),char(0),char(7),char(0),char(13),char(0),char(-33),char(0),char(65),char(0),char(-46),char(0),char(4),char(0),char(-32),char(0),
+char(7),char(0),char(-31),char(0),char(7),char(0),char(-30),char(0),char(7),char(0),char(-29),char(0),char(4),char(0),char(53),char(0),char(70),char(0),char(6),char(0),
+char(15),char(0),char(-28),char(0),char(13),char(0),char(-30),char(0),char(13),char(0),char(-27),char(0),char(56),char(0),char(-26),char(0),char(4),char(0),char(-25),char(0),
+char(7),char(0),char(-29),char(0),char(71),char(0),char(26),char(0),char(4),char(0),char(-24),char(0),char(7),char(0),char(-23),char(0),char(7),char(0),char(-79),char(0),
+char(7),char(0),char(-22),char(0),char(7),char(0),char(-21),char(0),char(7),char(0),char(-20),char(0),char(7),char(0),char(-19),char(0),char(7),char(0),char(-18),char(0),
+char(7),char(0),char(-17),char(0),char(7),char(0),char(-16),char(0),char(7),char(0),char(-15),char(0),char(7),char(0),char(-14),char(0),char(7),char(0),char(-13),char(0),
+char(7),char(0),char(-12),char(0),char(7),char(0),char(-11),char(0),char(7),char(0),char(-10),char(0),char(7),char(0),char(-9),char(0),char(7),char(0),char(-8),char(0),
+char(7),char(0),char(-7),char(0),char(7),char(0),char(-6),char(0),char(7),char(0),char(-5),char(0),char(4),char(0),char(-4),char(0),char(4),char(0),char(-3),char(0),
+char(4),char(0),char(-2),char(0),char(4),char(0),char(-1),char(0),char(4),char(0),char(117),char(0),char(72),char(0),char(12),char(0),char(15),char(0),char(0),char(1),
+char(15),char(0),char(1),char(1),char(15),char(0),char(2),char(1),char(13),char(0),char(3),char(1),char(13),char(0),char(4),char(1),char(7),char(0),char(5),char(1),
+char(4),char(0),char(6),char(1),char(4),char(0),char(7),char(1),char(4),char(0),char(8),char(1),char(4),char(0),char(9),char(1),char(7),char(0),char(-31),char(0),
+char(4),char(0),char(53),char(0),char(73),char(0),char(27),char(0),char(17),char(0),char(10),char(1),char(15),char(0),char(11),char(1),char(15),char(0),char(12),char(1),
+char(13),char(0),char(3),char(1),char(13),char(0),char(13),char(1),char(13),char(0),char(14),char(1),char(13),char(0),char(15),char(1),char(13),char(0),char(16),char(1),
+char(13),char(0),char(17),char(1),char(4),char(0),char(18),char(1),char(7),char(0),char(19),char(1),char(4),char(0),char(20),char(1),char(4),char(0),char(21),char(1),
+char(4),char(0),char(22),char(1),char(7),char(0),char(23),char(1),char(7),char(0),char(24),char(1),char(4),char(0),char(25),char(1),char(4),char(0),char(26),char(1),
+char(7),char(0),char(27),char(1),char(7),char(0),char(28),char(1),char(7),char(0),char(29),char(1),char(7),char(0),char(30),char(1),char(7),char(0),char(31),char(1),
+char(7),char(0),char(32),char(1),char(4),char(0),char(33),char(1),char(4),char(0),char(34),char(1),char(4),char(0),char(35),char(1),char(74),char(0),char(12),char(0),
+char(9),char(0),char(36),char(1),char(9),char(0),char(37),char(1),char(13),char(0),char(38),char(1),char(7),char(0),char(39),char(1),char(7),char(0),char(-63),char(0),
+char(7),char(0),char(40),char(1),char(4),char(0),char(41),char(1),char(13),char(0),char(42),char(1),char(4),char(0),char(43),char(1),char(4),char(0),char(44),char(1),
+char(4),char(0),char(45),char(1),char(4),char(0),char(53),char(0),char(75),char(0),char(19),char(0),char(47),char(0),char(125),char(0),char(72),char(0),char(46),char(1),
+char(65),char(0),char(47),char(1),char(66),char(0),char(48),char(1),char(67),char(0),char(49),char(1),char(68),char(0),char(50),char(1),char(69),char(0),char(51),char(1),
+char(70),char(0),char(52),char(1),char(73),char(0),char(53),char(1),char(74),char(0),char(54),char(1),char(4),char(0),char(55),char(1),char(4),char(0),char(21),char(1),
+char(4),char(0),char(56),char(1),char(4),char(0),char(57),char(1),char(4),char(0),char(58),char(1),char(4),char(0),char(59),char(1),char(4),char(0),char(60),char(1),
+char(4),char(0),char(61),char(1),char(71),char(0),char(62),char(1),};
+int sBulletDNAlen64= sizeof(sBulletDNAstr64);
diff --git a/Engine/lib/bullet/src/LinearMath/btSerializer.h b/Engine/lib/bullet/src/LinearMath/btSerializer.h
new file mode 100644
index 000000000..c5bc96b78
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btSerializer.h
@@ -0,0 +1,640 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2009 Erwin Coumans  http://bulletphysics.org
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef BT_SERIALIZER_H
+#define BT_SERIALIZER_H
+
+#include "btScalar.h" // has definitions like SIMD_FORCE_INLINE
+#include "btStackAlloc.h"
+#include "btHashMap.h"
+
+#if !defined( __CELLOS_LV2__) && !defined(__MWERKS__)
+#include <memory.h>
+#endif
+#include <string.h>
+
+
+
+///only the 32bit versions for now
+extern char sBulletDNAstr[];
+extern int sBulletDNAlen;
+extern char sBulletDNAstr64[];
+extern int sBulletDNAlen64;
+
+SIMD_FORCE_INLINE	int btStrLen(const char* str) 
+{
+    if (!str) 
+		return(0);
+	int len = 0;
+    
+	while (*str != 0)
+	{
+        str++;
+        len++;
+    }
+
+    return len;
+}
+
+
+class btChunk
+{
+public:
+	int		m_chunkCode;
+	int		m_length;
+	void	*m_oldPtr;
+	int		m_dna_nr;
+	int		m_number;
+};
+
+enum	btSerializationFlags
+{
+	BT_SERIALIZE_NO_BVH = 1,
+	BT_SERIALIZE_NO_TRIANGLEINFOMAP = 2,
+	BT_SERIALIZE_NO_DUPLICATE_ASSERT = 4
+};
+
+class	btSerializer
+{
+
+public:
+
+	virtual ~btSerializer() {}
+
+	virtual	const unsigned char*		getBufferPointer() const = 0;
+
+	virtual	int		getCurrentBufferSize() const = 0;
+
+	virtual	btChunk*	allocate(size_t size, int numElements) = 0;
+
+	virtual	void	finalizeChunk(btChunk* chunk, const char* structType, int chunkCode,void* oldPtr)= 0;
+
+	virtual	 void*	findPointer(void* oldPtr)  = 0;
+
+	virtual	void*	getUniquePointer(void*oldPtr) = 0;
+
+	virtual	void	startSerialization() = 0;
+	
+	virtual	void	finishSerialization() = 0;
+
+	virtual	const char*	findNameForPointer(const void* ptr) const = 0;
+
+	virtual	void	registerNameForPointer(const void* ptr, const char* name) = 0;
+
+	virtual void	serializeName(const char* ptr) = 0;
+
+	virtual int		getSerializationFlags() const = 0;
+
+	virtual void	setSerializationFlags(int flags) = 0;
+
+
+};
+
+
+
+#define BT_HEADER_LENGTH 12
+#if defined(__sgi) || defined (__sparc) || defined (__sparc__) || defined (__PPC__) || defined (__ppc__) || defined (__BIG_ENDIAN__)
+#	define BT_MAKE_ID(a,b,c,d) ( (int)(a)<<24 | (int)(b)<<16 | (c)<<8 | (d) )
+#else
+#	define BT_MAKE_ID(a,b,c,d) ( (int)(d)<<24 | (int)(c)<<16 | (b)<<8 | (a) )
+#endif
+
+#define BT_SOFTBODY_CODE		BT_MAKE_ID('S','B','D','Y')
+#define BT_COLLISIONOBJECT_CODE BT_MAKE_ID('C','O','B','J')
+#define BT_RIGIDBODY_CODE		BT_MAKE_ID('R','B','D','Y')
+#define BT_CONSTRAINT_CODE		BT_MAKE_ID('C','O','N','S')
+#define BT_BOXSHAPE_CODE		BT_MAKE_ID('B','O','X','S')
+#define BT_QUANTIZED_BVH_CODE	BT_MAKE_ID('Q','B','V','H')
+#define BT_TRIANLGE_INFO_MAP	BT_MAKE_ID('T','M','A','P')
+#define BT_SHAPE_CODE			BT_MAKE_ID('S','H','A','P')
+#define BT_ARRAY_CODE			BT_MAKE_ID('A','R','A','Y')
+#define BT_SBMATERIAL_CODE		BT_MAKE_ID('S','B','M','T')
+#define BT_SBNODE_CODE			BT_MAKE_ID('S','B','N','D')
+#define BT_DYNAMICSWORLD_CODE	BT_MAKE_ID('D','W','L','D')
+#define BT_DNA_CODE				BT_MAKE_ID('D','N','A','1')
+
+
+struct	btPointerUid
+{
+	union
+	{
+		void*	m_ptr;
+		int		m_uniqueIds[2];
+	};
+};
+
+///The btDefaultSerializer is the main Bullet serialization class.
+///The constructor takes an optional argument for backwards compatibility, it is recommended to leave this empty/zero.
+class btDefaultSerializer	:	public btSerializer
+{
+
+
+	btAlignedObjectArray<char*>			mTypes;
+	btAlignedObjectArray<short*>			mStructs;
+	btAlignedObjectArray<short>			mTlens;
+	btHashMap<btHashInt, int>			mStructReverse;
+	btHashMap<btHashString,int>	mTypeLookup;
+
+	
+	btHashMap<btHashPtr,void*>	m_chunkP;
+	
+	btHashMap<btHashPtr,const char*>	m_nameMap;
+
+	btHashMap<btHashPtr,btPointerUid>	m_uniquePointers;
+	int	m_uniqueIdGenerator;
+
+	int					m_totalSize;
+	unsigned char*		m_buffer;
+	int					m_currentSize;
+	void*				m_dna;
+	int					m_dnaLength;
+
+	int					m_serializationFlags;
+
+
+	btAlignedObjectArray<btChunk*>	m_chunkPtrs;
+	
+protected:
+
+	virtual	void*	findPointer(void* oldPtr) 
+	{
+		void** ptr = m_chunkP.find(oldPtr);
+		if (ptr && *ptr)
+			return *ptr;
+		return 0;
+	}
+
+	
+
+
+
+		void	writeDNA()
+		{
+			btChunk* dnaChunk = allocate(m_dnaLength,1);
+			memcpy(dnaChunk->m_oldPtr,m_dna,m_dnaLength);
+			finalizeChunk(dnaChunk,"DNA1",BT_DNA_CODE, m_dna);
+		}
+
+		int getReverseType(const char *type) const
+		{
+
+			btHashString key(type);
+			const int* valuePtr = mTypeLookup.find(key);
+			if (valuePtr)
+				return *valuePtr;
+			
+			return -1;
+		}
+
+		void initDNA(const char* bdnaOrg,int dnalen)
+		{
+			///was already initialized
+			if (m_dna)
+				return;
+
+			int littleEndian= 1;
+			littleEndian= ((char*)&littleEndian)[0];
+			
+
+			m_dna = btAlignedAlloc(dnalen,16);
+			memcpy(m_dna,bdnaOrg,dnalen);
+			m_dnaLength = dnalen;
+
+			int *intPtr=0;
+			short *shtPtr=0;
+			char *cp = 0;int dataLen =0;
+			intPtr = (int*)m_dna;
+
+			/*
+				SDNA (4 bytes) (magic number)
+				NAME (4 bytes)
+				<nr> (4 bytes) amount of names (int)
+				<string>
+				<string>
+			*/
+
+			if (strncmp((const char*)m_dna, "SDNA", 4)==0)
+			{
+				// skip ++ NAME
+				intPtr++; intPtr++;
+			}
+
+			// Parse names
+			if (!littleEndian)
+				*intPtr = btSwapEndian(*intPtr);
+				
+			dataLen = *intPtr;
+			
+			intPtr++;
+
+			cp = (char*)intPtr;
+			int i;
+			for ( i=0; i<dataLen; i++)
+			{
+				
+				while (*cp)cp++;
+				cp++;
+			}
+			cp = btAlignPointer(cp,4);
+
+			/*
+				TYPE (4 bytes)
+				<nr> amount of types (int)
+				<string>
+				<string>
+			*/
+
+			intPtr = (int*)cp;
+			btAssert(strncmp(cp, "TYPE", 4)==0); intPtr++;
+
+			if (!littleEndian)
+				*intPtr =  btSwapEndian(*intPtr);
+			
+			dataLen = *intPtr;
+			intPtr++;
+
+			
+			cp = (char*)intPtr;
+			for (i=0; i<dataLen; i++)
+			{
+				mTypes.push_back(cp);
+				while (*cp)cp++;
+				cp++;
+			}
+
+			cp = btAlignPointer(cp,4);
+
+
+			/*
+				TLEN (4 bytes)
+				<len> (short) the lengths of types
+				<len>
+			*/
+
+			// Parse type lens
+			intPtr = (int*)cp;
+			btAssert(strncmp(cp, "TLEN", 4)==0); intPtr++;
+
+			dataLen = (int)mTypes.size();
+
+			shtPtr = (short*)intPtr;
+			for (i=0; i<dataLen; i++, shtPtr++)
+			{
+				if (!littleEndian)
+					shtPtr[0] = btSwapEndian(shtPtr[0]);
+				mTlens.push_back(shtPtr[0]);
+			}
+
+			if (dataLen & 1) shtPtr++;
+
+			/*
+				STRC (4 bytes)
+				<nr> amount of structs (int)
+				<typenr>
+				<nr_of_elems>
+				<typenr>
+				<namenr>
+				<typenr>
+				<namenr>
+			*/
+
+			intPtr = (int*)shtPtr;
+			cp = (char*)intPtr;
+			btAssert(strncmp(cp, "STRC", 4)==0); intPtr++;
+
+			if (!littleEndian)
+				*intPtr = btSwapEndian(*intPtr);
+			dataLen = *intPtr ; 
+			intPtr++;
+
+
+			shtPtr = (short*)intPtr;
+			for (i=0; i<dataLen; i++)
+			{
+				mStructs.push_back (shtPtr);
+				
+				if (!littleEndian)
+				{
+					shtPtr[0]= btSwapEndian(shtPtr[0]);
+					shtPtr[1]= btSwapEndian(shtPtr[1]);
+
+					int len = shtPtr[1];
+					shtPtr+= 2;
+
+					for (int a=0; a<len; a++, shtPtr+=2)
+					{
+							shtPtr[0]= btSwapEndian(shtPtr[0]);
+							shtPtr[1]= btSwapEndian(shtPtr[1]);
+					}
+
+				} else
+				{
+					shtPtr+= (2*shtPtr[1])+2;
+				}
+			}
+
+			// build reverse lookups
+			for (i=0; i<(int)mStructs.size(); i++)
+			{
+				short *strc = mStructs.at(i);
+				mStructReverse.insert(strc[0], i);
+				mTypeLookup.insert(btHashString(mTypes[strc[0]]),i);
+			}
+		}
+
+public:	
+	
+
+	
+
+		btDefaultSerializer(int totalSize=0)
+			:m_totalSize(totalSize),
+			m_currentSize(0),
+			m_dna(0),
+			m_dnaLength(0),
+			m_serializationFlags(0)
+		{
+			m_buffer = m_totalSize?(unsigned char*)btAlignedAlloc(totalSize,16):0;
+			
+			const bool VOID_IS_8 = ((sizeof(void*)==8));
+
+#ifdef BT_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
+			if (VOID_IS_8)
+			{
+#if _WIN64
+				initDNA((const char*)sBulletDNAstr64,sBulletDNAlen64);
+#else
+				btAssert(0);
+#endif
+			} else
+			{
+#ifndef _WIN64
+				initDNA((const char*)sBulletDNAstr,sBulletDNAlen);
+#else
+				btAssert(0);
+#endif
+			}
+	
+#else //BT_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
+			if (VOID_IS_8)
+			{
+				initDNA((const char*)sBulletDNAstr64,sBulletDNAlen64);
+			} else
+			{
+				initDNA((const char*)sBulletDNAstr,sBulletDNAlen);
+			}
+#endif //BT_INTERNAL_UPDATE_SERIALIZATION_STRUCTURES
+	
+		}
+
+		virtual ~btDefaultSerializer() 
+		{
+			if (m_buffer)
+				btAlignedFree(m_buffer);
+			if (m_dna)
+				btAlignedFree(m_dna);
+		}
+
+		void	writeHeader(unsigned char* buffer) const
+		{
+			
+
+#ifdef  BT_USE_DOUBLE_PRECISION
+			memcpy(buffer, "BULLETd", 7);
+#else
+			memcpy(buffer, "BULLETf", 7);
+#endif //BT_USE_DOUBLE_PRECISION
+	
+			int littleEndian= 1;
+			littleEndian= ((char*)&littleEndian)[0];
+
+			if (sizeof(void*)==8)
+			{
+				buffer[7] = '-';
+			} else
+			{
+				buffer[7] = '_';
+			}
+
+			if (littleEndian)
+			{
+				buffer[8]='v';				
+			} else
+			{
+				buffer[8]='V';
+			}
+
+
+			buffer[9] = '2';
+			buffer[10] = '8';
+			buffer[11] = '1';
+
+		}
+
+		virtual	void	startSerialization()
+		{
+			m_uniqueIdGenerator= 1;
+			if (m_totalSize)
+			{
+				unsigned char* buffer = internalAlloc(BT_HEADER_LENGTH);
+				writeHeader(buffer);
+			}
+			
+		}
+
+		virtual	void	finishSerialization()
+		{
+			writeDNA();
+
+			//if we didn't pre-allocate a buffer, we need to create a contiguous buffer now
+			int mysize = 0;
+			if (!m_totalSize)
+			{
+				if (m_buffer)
+					btAlignedFree(m_buffer);
+
+				m_currentSize += BT_HEADER_LENGTH;
+				m_buffer = (unsigned char*)btAlignedAlloc(m_currentSize,16);
+
+				unsigned char* currentPtr = m_buffer;
+				writeHeader(m_buffer);
+				currentPtr += BT_HEADER_LENGTH;
+				mysize+=BT_HEADER_LENGTH;
+				for (int i=0;i<	m_chunkPtrs.size();i++)
+				{
+					int curLength = sizeof(btChunk)+m_chunkPtrs[i]->m_length;
+					memcpy(currentPtr,m_chunkPtrs[i], curLength);
+					btAlignedFree(m_chunkPtrs[i]);
+					currentPtr+=curLength;
+					mysize+=curLength;
+				}
+			}
+
+			mTypes.clear();
+			mStructs.clear();
+			mTlens.clear();
+			mStructReverse.clear();
+			mTypeLookup.clear();
+			m_chunkP.clear();
+			m_nameMap.clear();
+			m_uniquePointers.clear();
+			m_chunkPtrs.clear();
+		}
+
+		virtual	void*	getUniquePointer(void*oldPtr)
+		{
+			if (!oldPtr)
+				return 0;
+
+			btPointerUid* uptr = (btPointerUid*)m_uniquePointers.find(oldPtr);
+			if (uptr)
+			{
+				return uptr->m_ptr;
+			}
+			m_uniqueIdGenerator++;
+			
+			btPointerUid uid;
+			uid.m_uniqueIds[0] = m_uniqueIdGenerator;
+			uid.m_uniqueIds[1] = m_uniqueIdGenerator;
+			m_uniquePointers.insert(oldPtr,uid);
+			return uid.m_ptr;
+
+		}
+
+		virtual	const unsigned char*		getBufferPointer() const
+		{
+			return m_buffer;
+		}
+
+		virtual	int					getCurrentBufferSize() const
+		{
+			return	m_currentSize;
+		}
+
+		virtual	void	finalizeChunk(btChunk* chunk, const char* structType, int chunkCode,void* oldPtr)
+		{
+			if (!(m_serializationFlags&BT_SERIALIZE_NO_DUPLICATE_ASSERT))
+			{
+				btAssert(!findPointer(oldPtr));
+			}
+
+			chunk->m_dna_nr = getReverseType(structType);
+			
+			chunk->m_chunkCode = chunkCode;
+			
+			void* uniquePtr = getUniquePointer(oldPtr);
+			
+			m_chunkP.insert(oldPtr,uniquePtr);//chunk->m_oldPtr);
+			chunk->m_oldPtr = uniquePtr;//oldPtr;
+			
+		}
+
+		
+		virtual unsigned char* internalAlloc(size_t size)
+		{
+			unsigned char* ptr = 0;
+
+			if (m_totalSize)
+			{
+				ptr = m_buffer+m_currentSize;
+				m_currentSize += int(size);
+				btAssert(m_currentSize<m_totalSize);
+			} else
+			{
+				ptr = (unsigned char*)btAlignedAlloc(size,16);
+				m_currentSize += int(size);
+			}
+			return ptr;
+		}
+
+		
+
+		virtual	btChunk*	allocate(size_t size, int numElements)
+		{
+
+			unsigned char* ptr = internalAlloc(int(size)*numElements+sizeof(btChunk));
+
+			unsigned char* data = ptr + sizeof(btChunk);
+			
+			btChunk* chunk = (btChunk*)ptr;
+			chunk->m_chunkCode = 0;
+			chunk->m_oldPtr = data;
+			chunk->m_length = int(size)*numElements;
+			chunk->m_number = numElements;
+			
+			m_chunkPtrs.push_back(chunk);
+			
+
+			return chunk;
+		}
+
+		virtual	const char*	findNameForPointer(const void* ptr) const
+		{
+			const char*const * namePtr = m_nameMap.find(ptr);
+			if (namePtr && *namePtr)
+				return *namePtr;
+			return 0;
+
+		}
+
+		virtual	void	registerNameForPointer(const void* ptr, const char* name)
+		{
+			m_nameMap.insert(ptr,name);
+		}
+
+		virtual void	serializeName(const char* name)
+		{
+			if (name)
+			{
+				//don't serialize name twice
+				if (findPointer((void*)name))
+					return;
+
+				int len = btStrLen(name);
+				if (len)
+				{
+
+					int newLen = len+1;
+					int padding = ((newLen+3)&~3)-newLen;
+					newLen += padding;
+
+					//serialize name string now
+					btChunk* chunk = allocate(sizeof(char),newLen);
+					char* destinationName = (char*)chunk->m_oldPtr;
+					for (int i=0;i<len;i++)
+					{
+						destinationName[i] = name[i];
+					}
+					destinationName[len] = 0;
+					finalizeChunk(chunk,"char",BT_ARRAY_CODE,(void*)name);
+				}
+			}
+		}
+
+		virtual int		getSerializationFlags() const
+		{
+			return m_serializationFlags;
+		}
+
+		virtual void	setSerializationFlags(int flags)
+		{
+			m_serializationFlags = flags;
+		}
+
+};
+
+
+#endif //BT_SERIALIZER_H
+
diff --git a/Engine/lib/bullet/src/LinearMath/btTransform.h b/Engine/lib/bullet/src/LinearMath/btTransform.h
index c4fe33eec..907627379 100644
--- a/Engine/lib/bullet/src/LinearMath/btTransform.h
+++ b/Engine/lib/bullet/src/LinearMath/btTransform.h
@@ -14,17 +14,29 @@ subject to the following restrictions:
 
 
 
-#ifndef btTransform_H
-#define btTransform_H
+#ifndef BT_TRANSFORM_H
+#define BT_TRANSFORM_H
+
 
-#include "btVector3.h"
 #include "btMatrix3x3.h"
 
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btTransformData btTransformDoubleData
+#else
+#define btTransformData btTransformFloatData
+#endif
+
+
+
 
 /**@brief The btTransform class supports rigid transforms with only translation and rotation and no scaling/shear.
  *It can be used in combination with btVector3, btQuaternion and btMatrix3x3 linear algebra classes. */
-class btTransform {
+ATTRIBUTE_ALIGNED16(class) btTransform {
 	
+  ///Storage for the rotation
+	btMatrix3x3 m_basis;
+  ///Storage for the translation
+	btVector3   m_origin;
 
 public:
 	
@@ -81,9 +93,7 @@ public:
 /**@brief Return the transform of the vector */
 	SIMD_FORCE_INLINE btVector3 operator()(const btVector3& x) const
 	{
-		return btVector3(m_basis[0].dot(x) + m_origin.x(), 
-			m_basis[1].dot(x) + m_origin.y(), 
-			m_basis[2].dot(x) + m_origin.z());
+        return x.dot3(m_basis[0], m_basis[1], m_basis[2]) + m_origin;
 	}
 
   /**@brief Return the transform of the vector */
@@ -195,12 +205,17 @@ public:
 		static const btTransform identityTransform(btMatrix3x3::getIdentity());
 		return identityTransform;
 	}
-	
-private:
-  ///Storage for the rotation
-	btMatrix3x3 m_basis;
-  ///Storage for the translation
-	btVector3   m_origin;
+
+	void	serialize(struct	btTransformData& dataOut) const;
+
+	void	serializeFloat(struct	btTransformFloatData& dataOut) const;
+
+	void	deSerialize(const struct	btTransformData& dataIn);
+
+	void	deSerializeDouble(const struct	btTransformDoubleData& dataIn);
+
+	void	deSerializeFloat(const struct	btTransformFloatData& dataIn);
+
 };
 
 
@@ -234,7 +249,54 @@ SIMD_FORCE_INLINE bool operator==(const btTransform& t1, const btTransform& t2)
 }
 
 
-#endif
+///for serialization
+struct	btTransformFloatData
+{
+	btMatrix3x3FloatData	m_basis;
+	btVector3FloatData	m_origin;
+};
+
+struct	btTransformDoubleData
+{
+	btMatrix3x3DoubleData	m_basis;
+	btVector3DoubleData	m_origin;
+};
+
+
+
+SIMD_FORCE_INLINE	void	btTransform::serialize(btTransformData& dataOut) const
+{
+	m_basis.serialize(dataOut.m_basis);
+	m_origin.serialize(dataOut.m_origin);
+}
+
+SIMD_FORCE_INLINE	void	btTransform::serializeFloat(btTransformFloatData& dataOut) const
+{
+	m_basis.serializeFloat(dataOut.m_basis);
+	m_origin.serializeFloat(dataOut.m_origin);
+}
+
+
+SIMD_FORCE_INLINE	void	btTransform::deSerialize(const btTransformData& dataIn)
+{
+	m_basis.deSerialize(dataIn.m_basis);
+	m_origin.deSerialize(dataIn.m_origin);
+}
+
+SIMD_FORCE_INLINE	void	btTransform::deSerializeFloat(const btTransformFloatData& dataIn)
+{
+	m_basis.deSerializeFloat(dataIn.m_basis);
+	m_origin.deSerializeFloat(dataIn.m_origin);
+}
+
+SIMD_FORCE_INLINE	void	btTransform::deSerializeDouble(const btTransformDoubleData& dataIn)
+{
+	m_basis.deSerializeDouble(dataIn.m_basis);
+	m_origin.deSerializeDouble(dataIn.m_origin);
+}
+
+
+#endif //BT_TRANSFORM_H
 
 
 
diff --git a/Engine/lib/bullet/src/LinearMath/btTransformUtil.h b/Engine/lib/bullet/src/LinearMath/btTransformUtil.h
index 1603d3e05..2303c2742 100644
--- a/Engine/lib/bullet/src/LinearMath/btTransformUtil.h
+++ b/Engine/lib/bullet/src/LinearMath/btTransformUtil.h
@@ -13,8 +13,8 @@ subject to the following restrictions:
 */
 
 
-#ifndef SIMD_TRANSFORM_UTIL_H
-#define SIMD_TRANSFORM_UTIL_H
+#ifndef BT_TRANSFORM_UTIL_H
+#define BT_TRANSFORM_UTIL_H
 
 #include "btTransform.h"
 #define ANGULAR_MOTION_THRESHOLD btScalar(0.5)*SIMD_HALF_PI
@@ -98,8 +98,6 @@ public:
 	{
 		btQuaternion orn1 = orn0.nearest(orn1a);
 		btQuaternion dorn = orn1 * orn0.inverse();
-		///floating point inaccuracy can lead to w component > 1..., which breaks 
-		dorn.normalize();
 		angle = dorn.getAngle();
 		axis = btVector3(dorn.x(),dorn.y(),dorn.z());
 		axis[3] = btScalar(0.);
@@ -188,7 +186,7 @@ public:
 			btTransformUtil::calculateVelocityQuaternion(m_posB,toPosB,m_ornB,toOrnB,btScalar(1.),linVelB,angVelB);
 			btScalar maxAngularProjectedVelocity = angVelA.length() * m_boundingRadiusA + angVelB.length() * m_boundingRadiusB;
 			btVector3 relLinVel = (linVelB-linVelA);
-			btScalar relLinVelocLength = (linVelB-linVelA).dot(m_separatingNormal);
+			btScalar relLinVelocLength = relLinVel.dot(m_separatingNormal);
 			if (relLinVelocLength<0.f)
 			{
 				relLinVelocLength = 0.f;
@@ -226,5 +224,5 @@ public:
 };
 
 
-#endif //SIMD_TRANSFORM_UTIL_H
+#endif //BT_TRANSFORM_UTIL_H
 
diff --git a/Engine/lib/bullet/src/LinearMath/btVector3.cpp b/Engine/lib/bullet/src/LinearMath/btVector3.cpp
new file mode 100644
index 000000000..24bd521a9
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/btVector3.cpp
@@ -0,0 +1,1631 @@
+/*
+ Copyright (c) 2011 Apple Inc.
+ http://continuousphysics.com/Bullet/
+ 
+ This software is provided 'as-is', without any express or implied warranty.
+ In no event will the authors be held liable for any damages arising from the use of this software.
+ Permission is granted to anyone to use this software for any purpose, 
+ including commercial applications, and to alter it and redistribute it freely, 
+ subject to the following restrictions:
+ 
+ 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+ 
+ This source version has been altered.
+ */
+
+#if defined (_WIN32) || defined (__i386__)
+#define BT_USE_SSE_IN_API
+#endif
+
+#include "btVector3.h"
+
+#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+
+#ifdef __APPLE__
+#include <stdint.h>
+typedef  float float4 __attribute__ ((vector_size(16)));
+#else
+#define float4 __m128
+#endif
+//typedef  uint32_t uint4 __attribute__ ((vector_size(16)));
+
+
+#if defined BT_USE_SSE || defined _WIN32
+
+#define LOG2_ARRAY_SIZE     6
+#define STACK_ARRAY_COUNT   (1UL << LOG2_ARRAY_SIZE)
+
+#include <emmintrin.h>
+
+long _maxdot_large( const float *vv, const float *vec, unsigned long count, float *dotResult );
+long _maxdot_large( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    const float4 *vertices = (const float4*) vv;
+    static const unsigned char indexTable[16] = {-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
+    float4 dotMax = btAssign128( -BT_INFINITY,  -BT_INFINITY,  -BT_INFINITY,  -BT_INFINITY );
+    float4 vvec = _mm_loadu_ps( vec );
+    float4 vHi = btCastiTo128f(_mm_shuffle_epi32( btCastfTo128i( vvec), 0xaa ));          /// zzzz
+    float4 vLo = _mm_movelh_ps( vvec, vvec );                               /// xyxy
+    
+    long maxIndex = -1L;
+    
+    size_t segment = 0;
+    float4 stack_array[ STACK_ARRAY_COUNT ];
+    
+#if DEBUG
+    memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) );
+#endif
+    
+    size_t index;
+    float4 max;
+    // Faster loop without cleanup code for full tiles
+    for ( segment = 0; segment + STACK_ARRAY_COUNT*4 <= count; segment += STACK_ARRAY_COUNT*4 ) 
+    {
+        max = dotMax;
+        
+        for( index = 0; index < STACK_ARRAY_COUNT; index+= 4 )   
+        { // do four dot products at a time. Carefully avoid touching the w element.
+            float4 v0 = vertices[0];
+            float4 v1 = vertices[1];
+            float4 v2 = vertices[2];
+            float4 v3 = vertices[3];            vertices += 4;
+            
+            float4 lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            float4 hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            float4 lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            float4 hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+1] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+2] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+3] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            // It is too costly to keep the index of the max here. We will look for it again later.  We save a lot of work this way.
+        }
+        
+        // If we found a new max
+        if( 0xf != _mm_movemask_ps( (float4) _mm_cmpeq_ps(max, dotMax)))
+        { 
+            // copy the new max across all lanes of our max accumulator
+            max = _mm_max_ps(max, (float4) _mm_shuffle_ps( max, max, 0x4e));
+            max = _mm_max_ps(max, (float4) _mm_shuffle_ps( max, max, 0xb1));
+            
+            dotMax = max;
+            
+            // find first occurrence of that max  
+            size_t test;
+            for( index = 0; 0 == (test=_mm_movemask_ps( _mm_cmpeq_ps( stack_array[index], max))); index++ )   // local_count must be a multiple of 4
+            {}
+            // record where it is.
+            maxIndex = 4*index + segment + indexTable[test];
+        }
+    }
+    
+    // account for work we've already done
+    count -= segment;
+    
+    // Deal with the last < STACK_ARRAY_COUNT vectors
+    max = dotMax;
+    index = 0;
+    
+    
+    if( btUnlikely( count > 16) )
+    {
+        for( ; index + 4 <= count / 4; index+=4 )   
+        { // do four dot products at a time. Carefully avoid touching the w element.
+            float4 v0 = vertices[0];
+            float4 v1 = vertices[1];
+            float4 v2 = vertices[2];
+            float4 v3 = vertices[3];            vertices += 4;
+            
+            float4 lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            float4 hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            float4 lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            float4 hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+1] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+2] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+3] = x;
+            max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            
+            // It is too costly to keep the index of the max here. We will look for it again later.  We save a lot of work this way.
+        }
+    }
+    
+    size_t localCount = (count & -4L) - 4*index;
+    if( localCount )
+    {
+#ifdef __APPLE__
+        float4 t0, t1, t2, t3, t4;
+        float4 * sap = &stack_array[index + localCount / 4];
+          vertices += localCount;      // counter the offset
+         size_t byteIndex = -(localCount) * sizeof(float);
+        //AT&T Code style assembly
+        asm volatile
+        (   ".align 4                                                                   \n\
+             0: movaps  %[max], %[t2]                            // move max out of the way to avoid propagating NaNs in max \n\
+          movaps  (%[vertices], %[byteIndex], 4),    %[t0]    // vertices[0]      \n\
+          movaps  16(%[vertices], %[byteIndex], 4),  %[t1]    // vertices[1]      \n\
+          movaps  %[t0], %[max]                               // vertices[0]      \n\
+          movlhps %[t1], %[max]                               // x0y0x1y1         \n\
+         movaps  32(%[vertices], %[byteIndex], 4),  %[t3]    // vertices[2]      \n\
+         movaps  48(%[vertices], %[byteIndex], 4),  %[t4]    // vertices[3]      \n\
+          mulps   %[vLo], %[max]                              // x0y0x1y1 * vLo   \n\
+         movhlps %[t0], %[t1]                                // z0w0z1w1         \n\
+         movaps  %[t3], %[t0]                                // vertices[2]      \n\
+         movlhps %[t4], %[t0]                                // x2y2x3y3         \n\
+         mulps   %[vLo], %[t0]                               // x2y2x3y3 * vLo   \n\
+          movhlps %[t3], %[t4]                                // z2w2z3w3         \n\
+          shufps  $0x88, %[t4], %[t1]                         // z0z1z2z3         \n\
+          mulps   %[vHi], %[t1]                               // z0z1z2z3 * vHi   \n\
+         movaps  %[max], %[t3]                               // x0y0x1y1 * vLo   \n\
+         shufps  $0x88, %[t0], %[max]                        // x0x1x2x3 * vLo.x \n\
+         shufps  $0xdd, %[t0], %[t3]                         // y0y1y2y3 * vLo.y \n\
+         addps   %[t3], %[max]                               // x + y            \n\
+         addps   %[t1], %[max]                               // x + y + z        \n\
+         movaps  %[max], (%[sap], %[byteIndex])              // record result for later scrutiny \n\
+         maxps   %[t2], %[max]                               // record max, restore max   \n\
+         add     $16, %[byteIndex]                           // advance loop counter\n\
+         jnz     0b                                          \n\
+     "
+         : [max] "+x" (max), [t0] "=&x" (t0), [t1] "=&x" (t1), [t2] "=&x" (t2), [t3] "=&x" (t3), [t4] "=&x" (t4), [byteIndex] "+r" (byteIndex)
+         : [vLo] "x" (vLo), [vHi] "x" (vHi), [vertices] "r" (vertices), [sap] "r" (sap)
+         : "memory", "cc"
+         );
+        index += localCount/4;
+#else
+        {
+            for( unsigned int i=0; i<localCount/4; i++,index++)   
+            { // do four dot products at a time. Carefully avoid touching the w element.
+                float4 v0 = vertices[0];
+                float4 v1 = vertices[1];
+                float4 v2 = vertices[2];
+                float4 v3 = vertices[3];            
+                vertices += 4;
+                
+                float4 lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+                float4 hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+                float4 lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+                float4 hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+                
+                lo0 = lo0*vLo;
+                lo1 = lo1*vLo;
+                float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
+                float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
+                float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+                z = z*vHi;
+                x = x+y;
+                x = x+z;
+                stack_array[index] = x;
+                max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+            }
+        }
+#endif //__APPLE__
+    }
+
+    // process the last few points
+    if( count & 3 )
+    {
+        float4 v0, v1, v2, x, y, z;
+        switch( count & 3 )
+        {
+            case 3:
+            {
+                v0 = vertices[0];
+                v1 = vertices[1];
+                v2 = vertices[2];
+                
+                // Calculate 3 dot products, transpose, duplicate v2
+                float4 lo0 = _mm_movelh_ps( v0, v1);        // xyxy.lo
+                float4 hi0 = _mm_movehl_ps( v1, v0);        // z?z?.lo
+                lo0 = lo0*vLo;
+                z = _mm_shuffle_ps(hi0, v2,  0xa8 );           // z0z1z2z2
+                z = z*vHi;
+                float4 lo1 = _mm_movelh_ps(v2, v2);          // xyxy
+                lo1 = lo1*vLo;
+                x = _mm_shuffle_ps(lo0, lo1, 0x88);
+                y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            }
+                break;
+            case 2:
+            {
+                v0 = vertices[0];
+                v1 = vertices[1];
+                float4 xy = _mm_movelh_ps(v0, v1);
+                z = _mm_movehl_ps(v1, v0);
+                xy = xy*vLo;
+                z = _mm_shuffle_ps( z, z,  0xa8);
+                x = _mm_shuffle_ps( xy, xy, 0xa8);
+                y = _mm_shuffle_ps( xy, xy, 0xfd);
+                z = z*vHi;
+            }
+                break;
+            case 1:
+            {
+                float4 xy = vertices[0];
+                z =  _mm_shuffle_ps( xy, xy, 0xaa);
+                xy = xy*vLo;
+                z = z*vHi;
+                x = _mm_shuffle_ps(xy, xy, 0);
+                y = _mm_shuffle_ps(xy, xy, 0x55);
+            }
+                break;
+        }
+        x = x+y;
+        x = x+z;
+        stack_array[index] = x;
+        max = _mm_max_ps( x, max );         // control the order here so that max is never NaN even if x is nan
+        index++;
+    }
+    
+    // if we found a new max. 
+    if( 0 == segment || 0xf != _mm_movemask_ps( (float4) _mm_cmpeq_ps(max, dotMax)))
+    { // we found a new max. Search for it
+      // find max across the max vector, place in all elements of max -- big latency hit here
+        max = _mm_max_ps(max, (float4) _mm_shuffle_ps( max, max, 0x4e));
+        max = _mm_max_ps(max, (float4) _mm_shuffle_ps( max, max, 0xb1));
+        
+        // It is slightly faster to do this part in scalar code when count < 8. However, the common case for
+        // this where it actually makes a difference is handled in the early out at the top of the function, 
+        // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced 
+        // complexity, and removed it.
+        
+        dotMax = max;
+        
+        // scan for the first occurence of max in the array  
+        size_t test;
+        for( index = 0; 0 == (test=_mm_movemask_ps( _mm_cmpeq_ps( stack_array[index], max))); index++ )   // local_count must be a multiple of 4
+        {}
+        maxIndex = 4*index + segment + indexTable[test];
+    }
+    
+    _mm_store_ss( dotResult, dotMax);
+    return maxIndex;
+}
+
+long _mindot_large( const float *vv, const float *vec, unsigned long count, float *dotResult );
+
+long _mindot_large( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    const float4 *vertices = (const float4*) vv;
+    static const unsigned char indexTable[16] = {-1, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 };
+    float4 dotmin = btAssign128( BT_INFINITY,  BT_INFINITY,  BT_INFINITY,  BT_INFINITY );
+    float4 vvec = _mm_loadu_ps( vec );
+    float4 vHi = btCastiTo128f(_mm_shuffle_epi32( btCastfTo128i( vvec), 0xaa ));          /// zzzz
+    float4 vLo = _mm_movelh_ps( vvec, vvec );                               /// xyxy
+    
+    long minIndex = -1L;
+
+    size_t segment = 0;
+    float4 stack_array[ STACK_ARRAY_COUNT ];
+    
+#if DEBUG
+    memset( stack_array, -1, STACK_ARRAY_COUNT * sizeof(stack_array[0]) );
+#endif
+    
+    size_t index;
+    float4 min;
+    // Faster loop without cleanup code for full tiles
+    for ( segment = 0; segment + STACK_ARRAY_COUNT*4 <= count; segment += STACK_ARRAY_COUNT*4 ) 
+    {
+        min = dotmin;
+        
+        for( index = 0; index < STACK_ARRAY_COUNT; index+= 4 )   
+        { // do four dot products at a time. Carefully avoid touching the w element.
+            float4 v0 = vertices[0];
+            float4 v1 = vertices[1];
+            float4 v2 = vertices[2];
+            float4 v3 = vertices[3];            vertices += 4;
+            
+            float4 lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            float4 hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            float4 lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            float4 hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+1] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+2] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+3] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            // It is too costly to keep the index of the min here. We will look for it again later.  We save a lot of work this way.
+        }
+        
+        // If we found a new min
+        if( 0xf != _mm_movemask_ps( (float4) _mm_cmpeq_ps(min, dotmin)))
+        { 
+            // copy the new min across all lanes of our min accumulator
+            min = _mm_min_ps(min, (float4) _mm_shuffle_ps( min, min, 0x4e));
+            min = _mm_min_ps(min, (float4) _mm_shuffle_ps( min, min, 0xb1));
+            
+            dotmin = min;
+            
+            // find first occurrence of that min  
+            size_t test;
+            for( index = 0; 0 == (test=_mm_movemask_ps( _mm_cmpeq_ps( stack_array[index], min))); index++ )   // local_count must be a multiple of 4
+            {}
+            // record where it is.
+            minIndex = 4*index + segment + indexTable[test];
+        }
+    }
+    
+    // account for work we've already done
+    count -= segment;
+    
+    // Deal with the last < STACK_ARRAY_COUNT vectors
+    min = dotmin;
+    index = 0;
+    
+    
+    if(btUnlikely( count > 16) )
+    {
+        for( ; index + 4 <= count / 4; index+=4 )   
+        { // do four dot products at a time. Carefully avoid touching the w element.
+            float4 v0 = vertices[0];
+            float4 v1 = vertices[1];
+            float4 v2 = vertices[2];
+            float4 v3 = vertices[3];            vertices += 4;
+            
+            float4 lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            float4 hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            float4 lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            float4 hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+1] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+2] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            v0 = vertices[0];
+            v1 = vertices[1];
+            v2 = vertices[2];
+            v3 = vertices[3];            vertices += 4;
+            
+            lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+            hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+            lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+            hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+            
+            lo0 = lo0*vLo;
+            lo1 = lo1*vLo;
+            z = _mm_shuffle_ps(hi0, hi1, 0x88);
+            x = _mm_shuffle_ps(lo0, lo1, 0x88);
+            y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            z = z*vHi;
+            x = x+y;
+            x = x+z;
+            stack_array[index+3] = x;
+            min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+            
+            // It is too costly to keep the index of the min here. We will look for it again later.  We save a lot of work this way.
+        }
+    }
+    
+    size_t localCount = (count & -4L) - 4*index;
+    if( localCount )
+    {
+        
+        
+#ifdef __APPLE__
+        vertices += localCount;      // counter the offset
+        float4 t0, t1, t2, t3, t4;
+        size_t byteIndex = -(localCount) * sizeof(float);
+        float4 * sap = &stack_array[index + localCount / 4];
+        
+        asm volatile
+        (   ".align 4                                                                   \n\
+             0: movaps  %[min], %[t2]                            // move min out of the way to avoid propagating NaNs in min \n\
+             movaps  (%[vertices], %[byteIndex], 4),    %[t0]    // vertices[0]      \n\
+             movaps  16(%[vertices], %[byteIndex], 4),  %[t1]    // vertices[1]      \n\
+             movaps  %[t0], %[min]                               // vertices[0]      \n\
+             movlhps %[t1], %[min]                               // x0y0x1y1         \n\
+             movaps  32(%[vertices], %[byteIndex], 4),  %[t3]    // vertices[2]      \n\
+             movaps  48(%[vertices], %[byteIndex], 4),  %[t4]    // vertices[3]      \n\
+             mulps   %[vLo], %[min]                              // x0y0x1y1 * vLo   \n\
+             movhlps %[t0], %[t1]                                // z0w0z1w1         \n\
+             movaps  %[t3], %[t0]                                // vertices[2]      \n\
+             movlhps %[t4], %[t0]                                // x2y2x3y3         \n\
+             movhlps %[t3], %[t4]                                // z2w2z3w3         \n\
+             mulps   %[vLo], %[t0]                               // x2y2x3y3 * vLo   \n\
+             shufps  $0x88, %[t4], %[t1]                         // z0z1z2z3         \n\
+             mulps   %[vHi], %[t1]                               // z0z1z2z3 * vHi   \n\
+             movaps  %[min], %[t3]                               // x0y0x1y1 * vLo   \n\
+             shufps  $0x88, %[t0], %[min]                        // x0x1x2x3 * vLo.x \n\
+             shufps  $0xdd, %[t0], %[t3]                         // y0y1y2y3 * vLo.y \n\
+             addps   %[t3], %[min]                               // x + y            \n\
+             addps   %[t1], %[min]                               // x + y + z        \n\
+             movaps  %[min], (%[sap], %[byteIndex])              // record result for later scrutiny \n\
+             minps   %[t2], %[min]                               // record min, restore min   \n\
+             add     $16, %[byteIndex]                           // advance loop counter\n\
+             jnz     0b                                          \n\
+             "
+         : [min] "+x" (min), [t0] "=&x" (t0), [t1] "=&x" (t1), [t2] "=&x" (t2), [t3] "=&x" (t3), [t4] "=&x" (t4), [byteIndex] "+r" (byteIndex)
+         : [vLo] "x" (vLo), [vHi] "x" (vHi), [vertices] "r" (vertices), [sap] "r" (sap)
+         : "memory", "cc"
+         );
+        index += localCount/4;
+#else
+        {
+            for( unsigned int i=0; i<localCount/4; i++,index++)   
+            { // do four dot products at a time. Carefully avoid touching the w element.
+                float4 v0 = vertices[0];
+                float4 v1 = vertices[1];
+                float4 v2 = vertices[2];
+                float4 v3 = vertices[3];            
+                vertices += 4;
+                
+                float4 lo0 = _mm_movelh_ps( v0, v1);    // x0y0x1y1
+                float4 hi0 = _mm_movehl_ps( v1, v0);    // z0?0z1?1
+                float4 lo1 = _mm_movelh_ps( v2, v3);    // x2y2x3y3
+                float4 hi1 = _mm_movehl_ps( v3, v2);    // z2?2z3?3
+                
+                lo0 = lo0*vLo;
+                lo1 = lo1*vLo;
+                float4 z = _mm_shuffle_ps(hi0, hi1, 0x88);
+                float4 x = _mm_shuffle_ps(lo0, lo1, 0x88);
+                float4 y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+                z = z*vHi;
+                x = x+y;
+                x = x+z;
+                stack_array[index] = x;
+                min = _mm_min_ps( x, min );         // control the order here so that max is never NaN even if x is nan
+            }
+        }
+
+#endif
+    }
+    
+    // process the last few points
+    if( count & 3 )
+    {
+        float4 v0, v1, v2, x, y, z;
+        switch( count & 3 )
+        {
+            case 3:
+            {
+                v0 = vertices[0];
+                v1 = vertices[1];
+                v2 = vertices[2];
+                
+                // Calculate 3 dot products, transpose, duplicate v2
+                float4 lo0 = _mm_movelh_ps( v0, v1);        // xyxy.lo
+                float4 hi0 = _mm_movehl_ps( v1, v0);        // z?z?.lo
+                lo0 = lo0*vLo;
+                z = _mm_shuffle_ps(hi0, v2,  0xa8 );           // z0z1z2z2
+                z = z*vHi;
+                float4 lo1 = _mm_movelh_ps(v2, v2);          // xyxy
+                lo1 = lo1*vLo;
+                x = _mm_shuffle_ps(lo0, lo1, 0x88);
+                y = _mm_shuffle_ps(lo0, lo1, 0xdd);
+            }
+                break;
+            case 2:
+            {
+                v0 = vertices[0];
+                v1 = vertices[1];
+                float4 xy = _mm_movelh_ps(v0, v1);
+                z = _mm_movehl_ps(v1, v0);
+                xy = xy*vLo;
+                z = _mm_shuffle_ps( z, z,  0xa8);
+                x = _mm_shuffle_ps( xy, xy, 0xa8);
+                y = _mm_shuffle_ps( xy, xy, 0xfd);
+                z = z*vHi;
+            }
+                break;
+            case 1:
+            {
+                float4 xy = vertices[0];
+                z =  _mm_shuffle_ps( xy, xy, 0xaa);
+                xy = xy*vLo;
+                z = z*vHi;
+                x = _mm_shuffle_ps(xy, xy, 0);
+                y = _mm_shuffle_ps(xy, xy, 0x55);
+            }
+                break;
+        }
+        x = x+y;
+        x = x+z;
+        stack_array[index] = x;
+        min = _mm_min_ps( x, min );         // control the order here so that min is never NaN even if x is nan
+        index++;
+    }
+    
+    // if we found a new min. 
+    if( 0 == segment || 0xf != _mm_movemask_ps( (float4) _mm_cmpeq_ps(min, dotmin)))
+    { // we found a new min. Search for it
+      // find min across the min vector, place in all elements of min -- big latency hit here
+        min = _mm_min_ps(min, (float4) _mm_shuffle_ps( min, min, 0x4e));
+        min = _mm_min_ps(min, (float4) _mm_shuffle_ps( min, min, 0xb1));
+        
+        // It is slightly faster to do this part in scalar code when count < 8. However, the common case for
+        // this where it actually makes a difference is handled in the early out at the top of the function, 
+        // so it is less than a 1% difference here. I opted for improved code size, fewer branches and reduced 
+        // complexity, and removed it.
+        
+        dotmin = min;
+        
+        // scan for the first occurence of min in the array  
+        size_t test;
+        for( index = 0; 0 == (test=_mm_movemask_ps( _mm_cmpeq_ps( stack_array[index], min))); index++ )   // local_count must be a multiple of 4
+        {}
+        minIndex = 4*index + segment + indexTable[test];
+    }
+    
+    _mm_store_ss( dotResult, dotmin);
+    return minIndex;
+}
+
+
+#elif defined BT_USE_NEON
+#define ARM_NEON_GCC_COMPATIBILITY  1
+#include <arm_neon.h>
+
+
+static long _maxdot_large_v0( const float *vv, const float *vec, unsigned long count, float *dotResult );
+static long _maxdot_large_v1( const float *vv, const float *vec, unsigned long count, float *dotResult );
+static long _maxdot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult );
+static long _mindot_large_v0( const float *vv, const float *vec, unsigned long count, float *dotResult );
+static long _mindot_large_v1( const float *vv, const float *vec, unsigned long count, float *dotResult );
+static long _mindot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult );
+
+long (*_maxdot_large)( const float *vv, const float *vec, unsigned long count, float *dotResult ) = _maxdot_large_sel;
+long (*_mindot_large)( const float *vv, const float *vec, unsigned long count, float *dotResult ) = _mindot_large_sel;
+
+extern "C" {int  _get_cpu_capabilities( void );}
+
+static long _maxdot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    if( _get_cpu_capabilities() & 0x2000 )
+        _maxdot_large = _maxdot_large_v1;
+    else
+        _maxdot_large = _maxdot_large_v0;
+    
+    return _maxdot_large(vv, vec, count, dotResult);
+}
+
+static long _mindot_large_sel( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    if( _get_cpu_capabilities() & 0x2000 )
+        _mindot_large = _mindot_large_v1;
+    else
+        _mindot_large = _mindot_large_v0;
+    
+    return _mindot_large(vv, vec, count, dotResult);
+}
+
+
+
+#define vld1q_f32_aligned_postincrement( _ptr ) ({ float32x4_t _r; asm( "vld1.f32  {%0}, [%1, :128]!\n" : "=w" (_r), "+r" (_ptr) ); /*return*/ _r; })
+
+
+long _maxdot_large_v0( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    unsigned long i = 0;
+    float32x4_t vvec = vld1q_f32_aligned_postincrement( vec );
+    float32x2_t vLo = vget_low_f32(vvec);
+    float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0);
+    float32x2_t dotMaxLo = (float32x2_t) { -BT_INFINITY, -BT_INFINITY };
+    float32x2_t dotMaxHi = (float32x2_t) { -BT_INFINITY, -BT_INFINITY };
+    uint32x2_t indexLo = (uint32x2_t) {0, 1};
+    uint32x2_t indexHi = (uint32x2_t) {2, 3};
+    uint32x2_t iLo = (uint32x2_t) {-1, -1};
+    uint32x2_t iHi = (uint32x2_t) {-1, -1};
+    const uint32x2_t four = (uint32x2_t) {4,4};
+
+    for( ; i+8 <= count; i+= 8 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+        float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+        float32x2_t xy2 = vmul_f32( vget_low_f32(v2), vLo);
+        float32x2_t xy3 = vmul_f32( vget_low_f32(v3), vLo);
+        
+        float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x2x2_t z1 = vtrn_f32( vget_high_f32(v2), vget_high_f32(v3));
+        float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+        float32x2_t zHi = vmul_f32( z1.val[0], vHi);
+        
+        float32x2_t rLo = vpadd_f32( xy0, xy1);
+        float32x2_t rHi = vpadd_f32( xy2, xy3);
+        rLo = vadd_f32(rLo, zLo);
+        rHi = vadd_f32(rHi, zHi);
+        
+        uint32x2_t maskLo = vcgt_f32( rLo, dotMaxLo );
+        uint32x2_t maskHi = vcgt_f32( rHi, dotMaxHi );
+        dotMaxLo = vbsl_f32( maskLo, rLo, dotMaxLo);
+        dotMaxHi = vbsl_f32( maskHi, rHi, dotMaxHi);
+        iLo = vbsl_u32(maskLo, indexLo, iLo);
+        iHi = vbsl_u32(maskHi, indexHi, iHi);
+        indexLo = vadd_u32(indexLo, four); 
+        indexHi = vadd_u32(indexHi, four);
+
+        v0 = vld1q_f32_aligned_postincrement( vv );
+        v1 = vld1q_f32_aligned_postincrement( vv );
+        v2 = vld1q_f32_aligned_postincrement( vv );
+        v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        xy0 = vmul_f32( vget_low_f32(v0), vLo);
+        xy1 = vmul_f32( vget_low_f32(v1), vLo);
+        xy2 = vmul_f32( vget_low_f32(v2), vLo);
+        xy3 = vmul_f32( vget_low_f32(v3), vLo);
+        
+        z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+        z1 = vtrn_f32( vget_high_f32(v2), vget_high_f32(v3));
+        zLo = vmul_f32( z0.val[0], vHi);
+        zHi = vmul_f32( z1.val[0], vHi);
+        
+        rLo = vpadd_f32( xy0, xy1);
+        rHi = vpadd_f32( xy2, xy3);
+        rLo = vadd_f32(rLo, zLo);
+        rHi = vadd_f32(rHi, zHi);
+        
+        maskLo = vcgt_f32( rLo, dotMaxLo );
+        maskHi = vcgt_f32( rHi, dotMaxHi );
+        dotMaxLo = vbsl_f32( maskLo, rLo, dotMaxLo);
+        dotMaxHi = vbsl_f32( maskHi, rHi, dotMaxHi);
+        iLo = vbsl_u32(maskLo, indexLo, iLo);
+        iHi = vbsl_u32(maskHi, indexHi, iHi);
+        indexLo = vadd_u32(indexLo, four);
+        indexHi = vadd_u32(indexHi, four);
+    }
+
+    for( ; i+4 <= count; i+= 4 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+        float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+        float32x2_t xy2 = vmul_f32( vget_low_f32(v2), vLo);
+        float32x2_t xy3 = vmul_f32( vget_low_f32(v3), vLo);
+        
+        float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x2x2_t z1 = vtrn_f32( vget_high_f32(v2), vget_high_f32(v3));
+        float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+        float32x2_t zHi = vmul_f32( z1.val[0], vHi);
+        
+        float32x2_t rLo = vpadd_f32( xy0, xy1);
+        float32x2_t rHi = vpadd_f32( xy2, xy3);
+        rLo = vadd_f32(rLo, zLo);
+        rHi = vadd_f32(rHi, zHi);
+        
+        uint32x2_t maskLo = vcgt_f32( rLo, dotMaxLo );
+        uint32x2_t maskHi = vcgt_f32( rHi, dotMaxHi );
+        dotMaxLo = vbsl_f32( maskLo, rLo, dotMaxLo);
+        dotMaxHi = vbsl_f32( maskHi, rHi, dotMaxHi);
+        iLo = vbsl_u32(maskLo, indexLo, iLo);
+        iHi = vbsl_u32(maskHi, indexHi, iHi);
+        indexLo = vadd_u32(indexLo, four);
+        indexHi = vadd_u32(indexHi, four);
+    }
+    
+    switch( count & 3 )
+    {
+        case 3:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+            
+            float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+            float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+            float32x2_t xy2 = vmul_f32( vget_low_f32(v2), vLo);
+            
+            float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+            float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+            float32x2_t zHi = vmul_f32( vdup_lane_f32(vget_high_f32(v2), 0), vHi);
+            
+            float32x2_t rLo = vpadd_f32( xy0, xy1);
+            float32x2_t rHi = vpadd_f32( xy2, xy2);
+            rLo = vadd_f32(rLo, zLo);
+            rHi = vadd_f32(rHi, zHi);
+            
+            uint32x2_t maskLo = vcgt_f32( rLo, dotMaxLo );
+            uint32x2_t maskHi = vcgt_f32( rHi, dotMaxHi );
+            dotMaxLo = vbsl_f32( maskLo, rLo, dotMaxLo);
+            dotMaxHi = vbsl_f32( maskHi, rHi, dotMaxHi);
+            iLo = vbsl_u32(maskLo, indexLo, iLo);
+            iHi = vbsl_u32(maskHi, indexHi, iHi);
+        }
+            break;
+        case 2:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            
+            float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+            float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+            
+            float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+            float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+            
+            float32x2_t rLo = vpadd_f32( xy0, xy1);
+            rLo = vadd_f32(rLo, zLo);
+            
+            uint32x2_t maskLo = vcgt_f32( rLo, dotMaxLo );
+            dotMaxLo = vbsl_f32( maskLo, rLo, dotMaxLo);
+            iLo = vbsl_u32(maskLo, indexLo, iLo);
+        }
+            break;
+        case 1:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+            float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0);
+            float32x2_t zLo = vmul_f32( z0, vHi);
+            float32x2_t rLo = vpadd_f32( xy0, xy0);
+            rLo = vadd_f32(rLo, zLo);
+            uint32x2_t maskLo = vcgt_f32( rLo, dotMaxLo );
+            dotMaxLo = vbsl_f32( maskLo, rLo, dotMaxLo);
+            iLo = vbsl_u32(maskLo, indexLo, iLo);
+        }
+            break;
+        
+        default:
+            break;
+    }
+    
+    // select best answer between hi and lo results
+    uint32x2_t mask = vcgt_f32( dotMaxHi, dotMaxLo );
+    dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo);
+    iLo = vbsl_u32(mask, iHi, iLo);
+    
+    // select best answer between even and odd results
+    dotMaxHi = vdup_lane_f32(dotMaxLo, 1);
+    iHi = vdup_lane_u32(iLo, 1);
+    mask = vcgt_f32( dotMaxHi, dotMaxLo );
+    dotMaxLo = vbsl_f32(mask, dotMaxHi, dotMaxLo);
+    iLo = vbsl_u32(mask, iHi, iLo);
+    
+    *dotResult = vget_lane_f32( dotMaxLo, 0);
+    return vget_lane_u32(iLo, 0);
+}
+
+
+long _maxdot_large_v1( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    float32x4_t vvec = vld1q_f32_aligned_postincrement( vec );
+    float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec));
+    float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
+    const uint32x4_t four = (uint32x4_t){ 4, 4, 4, 4 };
+    uint32x4_t local_index = (uint32x4_t) {0, 1, 2, 3};
+    uint32x4_t index = (uint32x4_t) { -1, -1, -1, -1 };
+    float32x4_t maxDot = (float32x4_t) { -BT_INFINITY, -BT_INFINITY, -BT_INFINITY, -BT_INFINITY };
+    
+    unsigned long i = 0;
+    for( ; i + 8 <= count; i += 8 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+        float32x4_t xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v3));
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x4_t z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v3));
+        
+        xy0 = vmulq_f32(xy0, vLo);
+        xy1 = vmulq_f32(xy1, vLo);
+        
+        float32x4x2_t zb = vuzpq_f32( z0, z1);
+        float32x4_t z = vmulq_f32( zb.val[0], vHi);
+        float32x4x2_t xy = vuzpq_f32( xy0, xy1);
+        float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+        x = vaddq_f32(x, z);
+        
+        uint32x4_t mask = vcgtq_f32(x, maxDot);
+        maxDot = vbslq_f32( mask, x, maxDot);
+        index = vbslq_u32(mask, local_index, index);
+        local_index = vaddq_u32(local_index, four);
+
+        v0 = vld1q_f32_aligned_postincrement( vv );
+        v1 = vld1q_f32_aligned_postincrement( vv );
+        v2 = vld1q_f32_aligned_postincrement( vv );
+        v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        // the next two lines should resolve to a single vswp d, d
+        xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+        xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v3));
+        // the next two lines should resolve to a single vswp d, d
+        z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+        z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v3));
+        
+        xy0 = vmulq_f32(xy0, vLo);
+        xy1 = vmulq_f32(xy1, vLo);
+        
+        zb = vuzpq_f32( z0, z1);
+        z = vmulq_f32( zb.val[0], vHi);
+        xy = vuzpq_f32( xy0, xy1);
+        x = vaddq_f32(xy.val[0], xy.val[1]);
+        x = vaddq_f32(x, z);
+        
+        mask = vcgtq_f32(x, maxDot);
+        maxDot = vbslq_f32( mask, x, maxDot);
+        index = vbslq_u32(mask, local_index, index);
+        local_index = vaddq_u32(local_index, four);
+    }
+
+    for( ; i + 4 <= count; i += 4 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+        float32x4_t xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v3));
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x4_t z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v3));
+        
+        xy0 = vmulq_f32(xy0, vLo);
+        xy1 = vmulq_f32(xy1, vLo);
+        
+        float32x4x2_t zb = vuzpq_f32( z0, z1);
+        float32x4_t z = vmulq_f32( zb.val[0], vHi);
+        float32x4x2_t xy = vuzpq_f32( xy0, xy1);
+        float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+        x = vaddq_f32(x, z);
+        
+        uint32x4_t mask = vcgtq_f32(x, maxDot);
+        maxDot = vbslq_f32( mask, x, maxDot);
+        index = vbslq_u32(mask, local_index, index);
+        local_index = vaddq_u32(local_index, four);
+    }
+    
+    switch (count & 3) {
+        case 3:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+            
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+            float32x4_t xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v2));
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+            float32x4_t z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v2));
+            
+            xy0 = vmulq_f32(xy0, vLo);
+            xy1 = vmulq_f32(xy1, vLo);
+            
+            float32x4x2_t zb = vuzpq_f32( z0, z1);
+            float32x4_t z = vmulq_f32( zb.val[0], vHi);
+            float32x4x2_t xy = vuzpq_f32( xy0, xy1);
+            float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+            x = vaddq_f32(x, z);
+            
+            uint32x4_t mask = vcgtq_f32(x, maxDot);
+            maxDot = vbslq_f32( mask, x, maxDot);
+            index = vbslq_u32(mask, local_index, index);
+            local_index = vaddq_u32(local_index, four);
+        }
+            break;
+
+        case 2:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+            
+            xy0 = vmulq_f32(xy0, vLo);
+            
+            float32x4x2_t zb = vuzpq_f32( z0, z0);
+            float32x4_t z = vmulq_f32( zb.val[0], vHi);
+            float32x4x2_t xy = vuzpq_f32( xy0, xy0);
+            float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+            x = vaddq_f32(x, z);
+            
+            uint32x4_t mask = vcgtq_f32(x, maxDot);
+            maxDot = vbslq_f32( mask, x, maxDot);
+            index = vbslq_u32(mask, local_index, index);
+            local_index = vaddq_u32(local_index, four);
+        }
+            break;
+
+        case 1:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v0));
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0); 
+            
+            xy0 = vmulq_f32(xy0, vLo);
+            
+            z = vmulq_f32( z, vHi);
+            float32x4x2_t xy = vuzpq_f32( xy0, xy0);
+            float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+            x = vaddq_f32(x, z);
+            
+            uint32x4_t mask = vcgtq_f32(x, maxDot);
+            maxDot = vbslq_f32( mask, x, maxDot);
+            index = vbslq_u32(mask, local_index, index);
+            local_index = vaddq_u32(local_index, four);
+        }
+            break;
+
+        default:
+            break;
+    }
+    
+    
+    // select best answer between hi and lo results
+    uint32x2_t mask = vcgt_f32( vget_high_f32(maxDot), vget_low_f32(maxDot));
+    float32x2_t maxDot2 = vbsl_f32(mask, vget_high_f32(maxDot), vget_low_f32(maxDot));
+    uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index));
+    
+    // select best answer between even and odd results
+    float32x2_t maxDotO = vdup_lane_f32(maxDot2, 1);
+    uint32x2_t indexHi = vdup_lane_u32(index2, 1);
+    mask = vcgt_f32( maxDotO, maxDot2 );
+    maxDot2 = vbsl_f32(mask, maxDotO, maxDot2);
+    index2 = vbsl_u32(mask, indexHi, index2);
+    
+    *dotResult = vget_lane_f32( maxDot2, 0);
+    return vget_lane_u32(index2, 0);
+    
+}
+
+long _mindot_large_v0( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    unsigned long i = 0;
+    float32x4_t vvec = vld1q_f32_aligned_postincrement( vec );
+    float32x2_t vLo = vget_low_f32(vvec);
+    float32x2_t vHi = vdup_lane_f32(vget_high_f32(vvec), 0);
+    float32x2_t dotMinLo = (float32x2_t) { BT_INFINITY, BT_INFINITY };
+    float32x2_t dotMinHi = (float32x2_t) { BT_INFINITY, BT_INFINITY };
+    uint32x2_t indexLo = (uint32x2_t) {0, 1};
+    uint32x2_t indexHi = (uint32x2_t) {2, 3};
+    uint32x2_t iLo = (uint32x2_t) {-1, -1};
+    uint32x2_t iHi = (uint32x2_t) {-1, -1};
+    const uint32x2_t four = (uint32x2_t) {4,4};
+    
+    for( ; i+8 <= count; i+= 8 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+        float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+        float32x2_t xy2 = vmul_f32( vget_low_f32(v2), vLo);
+        float32x2_t xy3 = vmul_f32( vget_low_f32(v3), vLo);
+        
+        float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x2x2_t z1 = vtrn_f32( vget_high_f32(v2), vget_high_f32(v3));
+        float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+        float32x2_t zHi = vmul_f32( z1.val[0], vHi);
+        
+        float32x2_t rLo = vpadd_f32( xy0, xy1);
+        float32x2_t rHi = vpadd_f32( xy2, xy3);
+        rLo = vadd_f32(rLo, zLo);
+        rHi = vadd_f32(rHi, zHi);
+        
+        uint32x2_t maskLo = vclt_f32( rLo, dotMinLo );
+        uint32x2_t maskHi = vclt_f32( rHi, dotMinHi );
+        dotMinLo = vbsl_f32( maskLo, rLo, dotMinLo);
+        dotMinHi = vbsl_f32( maskHi, rHi, dotMinHi);
+        iLo = vbsl_u32(maskLo, indexLo, iLo);
+        iHi = vbsl_u32(maskHi, indexHi, iHi);
+        indexLo = vadd_u32(indexLo, four);
+        indexHi = vadd_u32(indexHi, four);
+        
+        v0 = vld1q_f32_aligned_postincrement( vv );
+        v1 = vld1q_f32_aligned_postincrement( vv );
+        v2 = vld1q_f32_aligned_postincrement( vv );
+        v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        xy0 = vmul_f32( vget_low_f32(v0), vLo);
+        xy1 = vmul_f32( vget_low_f32(v1), vLo);
+        xy2 = vmul_f32( vget_low_f32(v2), vLo);
+        xy3 = vmul_f32( vget_low_f32(v3), vLo);
+        
+        z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+        z1 = vtrn_f32( vget_high_f32(v2), vget_high_f32(v3));
+        zLo = vmul_f32( z0.val[0], vHi);
+        zHi = vmul_f32( z1.val[0], vHi);
+        
+        rLo = vpadd_f32( xy0, xy1);
+        rHi = vpadd_f32( xy2, xy3);
+        rLo = vadd_f32(rLo, zLo);
+        rHi = vadd_f32(rHi, zHi);
+        
+        maskLo = vclt_f32( rLo, dotMinLo );
+        maskHi = vclt_f32( rHi, dotMinHi );
+        dotMinLo = vbsl_f32( maskLo, rLo, dotMinLo);
+        dotMinHi = vbsl_f32( maskHi, rHi, dotMinHi);
+        iLo = vbsl_u32(maskLo, indexLo, iLo);
+        iHi = vbsl_u32(maskHi, indexHi, iHi);
+        indexLo = vadd_u32(indexLo, four);
+        indexHi = vadd_u32(indexHi, four);
+    }
+
+    for( ; i+4 <= count; i+= 4 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+        float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+        float32x2_t xy2 = vmul_f32( vget_low_f32(v2), vLo);
+        float32x2_t xy3 = vmul_f32( vget_low_f32(v3), vLo);
+        
+        float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x2x2_t z1 = vtrn_f32( vget_high_f32(v2), vget_high_f32(v3));
+        float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+        float32x2_t zHi = vmul_f32( z1.val[0], vHi);
+        
+        float32x2_t rLo = vpadd_f32( xy0, xy1);
+        float32x2_t rHi = vpadd_f32( xy2, xy3);
+        rLo = vadd_f32(rLo, zLo);
+        rHi = vadd_f32(rHi, zHi);
+        
+        uint32x2_t maskLo = vclt_f32( rLo, dotMinLo );
+        uint32x2_t maskHi = vclt_f32( rHi, dotMinHi );
+        dotMinLo = vbsl_f32( maskLo, rLo, dotMinLo);
+        dotMinHi = vbsl_f32( maskHi, rHi, dotMinHi);
+        iLo = vbsl_u32(maskLo, indexLo, iLo);
+        iHi = vbsl_u32(maskHi, indexHi, iHi);
+        indexLo = vadd_u32(indexLo, four);
+        indexHi = vadd_u32(indexHi, four);
+    }
+    switch( count & 3 )
+    {
+        case 3:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+            
+            float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+            float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+            float32x2_t xy2 = vmul_f32( vget_low_f32(v2), vLo);
+            
+            float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+            float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+            float32x2_t zHi = vmul_f32( vdup_lane_f32(vget_high_f32(v2), 0), vHi);
+            
+            float32x2_t rLo = vpadd_f32( xy0, xy1);
+            float32x2_t rHi = vpadd_f32( xy2, xy2);
+            rLo = vadd_f32(rLo, zLo);
+            rHi = vadd_f32(rHi, zHi);
+            
+            uint32x2_t maskLo = vclt_f32( rLo, dotMinLo );
+            uint32x2_t maskHi = vclt_f32( rHi, dotMinHi );
+            dotMinLo = vbsl_f32( maskLo, rLo, dotMinLo);
+            dotMinHi = vbsl_f32( maskHi, rHi, dotMinHi);
+            iLo = vbsl_u32(maskLo, indexLo, iLo);
+            iHi = vbsl_u32(maskHi, indexHi, iHi);
+        }
+            break;
+        case 2:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            
+            float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+            float32x2_t xy1 = vmul_f32( vget_low_f32(v1), vLo);
+            
+            float32x2x2_t z0 = vtrn_f32( vget_high_f32(v0), vget_high_f32(v1));
+            float32x2_t zLo = vmul_f32( z0.val[0], vHi);
+            
+            float32x2_t rLo = vpadd_f32( xy0, xy1);
+            rLo = vadd_f32(rLo, zLo);
+            
+            uint32x2_t maskLo = vclt_f32( rLo, dotMinLo );
+            dotMinLo = vbsl_f32( maskLo, rLo, dotMinLo);
+            iLo = vbsl_u32(maskLo, indexLo, iLo);
+        }
+            break;
+        case 1:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x2_t xy0 = vmul_f32( vget_low_f32(v0), vLo);
+            float32x2_t z0 = vdup_lane_f32(vget_high_f32(v0), 0);
+            float32x2_t zLo = vmul_f32( z0, vHi);
+            float32x2_t rLo = vpadd_f32( xy0, xy0);
+            rLo = vadd_f32(rLo, zLo);
+            uint32x2_t maskLo = vclt_f32( rLo, dotMinLo );
+            dotMinLo = vbsl_f32( maskLo, rLo, dotMinLo);
+            iLo = vbsl_u32(maskLo, indexLo, iLo);
+        }
+            break;
+            
+        default:
+            break;
+    }
+    
+    // select best answer between hi and lo results
+    uint32x2_t mask = vclt_f32( dotMinHi, dotMinLo );
+    dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo);
+    iLo = vbsl_u32(mask, iHi, iLo);
+    
+    // select best answer between even and odd results
+    dotMinHi = vdup_lane_f32(dotMinLo, 1);
+    iHi = vdup_lane_u32(iLo, 1);
+    mask = vclt_f32( dotMinHi, dotMinLo );
+    dotMinLo = vbsl_f32(mask, dotMinHi, dotMinLo);
+    iLo = vbsl_u32(mask, iHi, iLo);
+    
+    *dotResult = vget_lane_f32( dotMinLo, 0);
+    return vget_lane_u32(iLo, 0);
+}
+
+long _mindot_large_v1( const float *vv, const float *vec, unsigned long count, float *dotResult )
+{
+    float32x4_t vvec = vld1q_f32_aligned_postincrement( vec );
+    float32x4_t vLo = vcombine_f32(vget_low_f32(vvec), vget_low_f32(vvec));
+    float32x4_t vHi = vdupq_lane_f32(vget_high_f32(vvec), 0);
+    const uint32x4_t four = (uint32x4_t){ 4, 4, 4, 4 };
+    uint32x4_t local_index = (uint32x4_t) {0, 1, 2, 3};
+    uint32x4_t index = (uint32x4_t) { -1, -1, -1, -1 };
+    float32x4_t minDot = (float32x4_t) { BT_INFINITY, BT_INFINITY, BT_INFINITY, BT_INFINITY };
+    
+    unsigned long i = 0;
+    for( ; i + 8 <= count; i += 8 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+        float32x4_t xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v3));
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x4_t z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v3));
+        
+        xy0 = vmulq_f32(xy0, vLo);
+        xy1 = vmulq_f32(xy1, vLo);
+        
+        float32x4x2_t zb = vuzpq_f32( z0, z1);
+        float32x4_t z = vmulq_f32( zb.val[0], vHi);
+        float32x4x2_t xy = vuzpq_f32( xy0, xy1);
+        float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+        x = vaddq_f32(x, z);
+        
+        uint32x4_t mask = vcltq_f32(x, minDot);
+        minDot = vbslq_f32( mask, x, minDot);
+        index = vbslq_u32(mask, local_index, index);
+        local_index = vaddq_u32(local_index, four);
+        
+        v0 = vld1q_f32_aligned_postincrement( vv );
+        v1 = vld1q_f32_aligned_postincrement( vv );
+        v2 = vld1q_f32_aligned_postincrement( vv );
+        v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        // the next two lines should resolve to a single vswp d, d
+        xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+        xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v3));
+        // the next two lines should resolve to a single vswp d, d
+        z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+        z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v3));
+        
+        xy0 = vmulq_f32(xy0, vLo);
+        xy1 = vmulq_f32(xy1, vLo);
+        
+        zb = vuzpq_f32( z0, z1);
+        z = vmulq_f32( zb.val[0], vHi);
+        xy = vuzpq_f32( xy0, xy1);
+        x = vaddq_f32(xy.val[0], xy.val[1]);
+        x = vaddq_f32(x, z);
+        
+        mask = vcltq_f32(x, minDot);
+        minDot = vbslq_f32( mask, x, minDot);
+        index = vbslq_u32(mask, local_index, index);
+        local_index = vaddq_u32(local_index, four);
+    }
+    
+    for( ; i + 4 <= count; i += 4 )
+    {
+        float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+        float32x4_t v3 = vld1q_f32_aligned_postincrement( vv );
+        
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+        float32x4_t xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v3));
+        // the next two lines should resolve to a single vswp d, d
+        float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+        float32x4_t z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v3));
+        
+        xy0 = vmulq_f32(xy0, vLo);
+        xy1 = vmulq_f32(xy1, vLo);
+        
+        float32x4x2_t zb = vuzpq_f32( z0, z1);
+        float32x4_t z = vmulq_f32( zb.val[0], vHi);
+        float32x4x2_t xy = vuzpq_f32( xy0, xy1);
+        float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+        x = vaddq_f32(x, z);
+        
+        uint32x4_t mask = vcltq_f32(x, minDot);
+        minDot = vbslq_f32( mask, x, minDot);
+        index = vbslq_u32(mask, local_index, index);
+        local_index = vaddq_u32(local_index, four);
+    }
+    
+    switch (count & 3) {
+        case 3:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v2 = vld1q_f32_aligned_postincrement( vv );
+            
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+            float32x4_t xy1 = vcombine_f32( vget_low_f32(v2), vget_low_f32(v2));
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+            float32x4_t z1 = vcombine_f32( vget_high_f32(v2), vget_high_f32(v2));
+            
+            xy0 = vmulq_f32(xy0, vLo);
+            xy1 = vmulq_f32(xy1, vLo);
+            
+            float32x4x2_t zb = vuzpq_f32( z0, z1);
+            float32x4_t z = vmulq_f32( zb.val[0], vHi);
+            float32x4x2_t xy = vuzpq_f32( xy0, xy1);
+            float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+            x = vaddq_f32(x, z);
+            
+            uint32x4_t mask = vcltq_f32(x, minDot);
+            minDot = vbslq_f32( mask, x, minDot);
+            index = vbslq_u32(mask, local_index, index);
+            local_index = vaddq_u32(local_index, four);
+        }
+            break;
+            
+        case 2:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            float32x4_t v1 = vld1q_f32_aligned_postincrement( vv );
+            
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v1));
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t z0 = vcombine_f32( vget_high_f32(v0), vget_high_f32(v1));
+            
+            xy0 = vmulq_f32(xy0, vLo);
+            
+            float32x4x2_t zb = vuzpq_f32( z0, z0);
+            float32x4_t z = vmulq_f32( zb.val[0], vHi);
+            float32x4x2_t xy = vuzpq_f32( xy0, xy0);
+            float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+            x = vaddq_f32(x, z);
+            
+            uint32x4_t mask = vcltq_f32(x, minDot);
+            minDot = vbslq_f32( mask, x, minDot);
+            index = vbslq_u32(mask, local_index, index);
+            local_index = vaddq_u32(local_index, four);
+        }
+            break;
+            
+        case 1:
+        {
+            float32x4_t v0 = vld1q_f32_aligned_postincrement( vv );
+            
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t xy0 = vcombine_f32( vget_low_f32(v0), vget_low_f32(v0));
+            // the next two lines should resolve to a single vswp d, d
+            float32x4_t z = vdupq_lane_f32(vget_high_f32(v0), 0); 
+            
+            xy0 = vmulq_f32(xy0, vLo);
+            
+            z = vmulq_f32( z, vHi);
+            float32x4x2_t xy = vuzpq_f32( xy0, xy0);
+            float32x4_t x = vaddq_f32(xy.val[0], xy.val[1]);
+            x = vaddq_f32(x, z);
+            
+            uint32x4_t mask = vcltq_f32(x, minDot);
+            minDot = vbslq_f32( mask, x, minDot);
+            index = vbslq_u32(mask, local_index, index);
+            local_index = vaddq_u32(local_index, four);
+        }
+            break;
+            
+        default:
+            break;
+    }
+    
+    
+    // select best answer between hi and lo results
+    uint32x2_t mask = vclt_f32( vget_high_f32(minDot), vget_low_f32(minDot));
+    float32x2_t minDot2 = vbsl_f32(mask, vget_high_f32(minDot), vget_low_f32(minDot));
+    uint32x2_t index2 = vbsl_u32(mask, vget_high_u32(index), vget_low_u32(index));
+    
+    // select best answer between even and odd results
+    float32x2_t minDotO = vdup_lane_f32(minDot2, 1);
+    uint32x2_t indexHi = vdup_lane_u32(index2, 1);
+    mask = vclt_f32( minDotO, minDot2 );
+    minDot2 = vbsl_f32(mask, minDotO, minDot2);
+    index2 = vbsl_u32(mask, indexHi, index2);
+    
+    *dotResult = vget_lane_f32( minDot2, 0);
+    return vget_lane_u32(index2, 0);
+    
+}
+
+#else
+    #error Unhandled __APPLE__ arch
+#endif
+
+#endif  /* __APPLE__ */
+
+
diff --git a/Engine/lib/bullet/src/LinearMath/btVector3.h b/Engine/lib/bullet/src/LinearMath/btVector3.h
index 4a670f4fb..5001dfa9f 100644
--- a/Engine/lib/bullet/src/LinearMath/btVector3.h
+++ b/Engine/lib/bullet/src/LinearMath/btVector3.h
@@ -14,22 +14,73 @@ subject to the following restrictions:
 
 
 
-#ifndef SIMD__VECTOR3_H
-#define SIMD__VECTOR3_H
+#ifndef BT_VECTOR3_H
+#define BT_VECTOR3_H
 
-
-#include "btScalar.h"
+//#include <stdint.h>
 #include "btScalar.h"
 #include "btMinMax.h"
+#include "btAlignedAllocator.h"
+
+#ifdef BT_USE_DOUBLE_PRECISION
+#define btVector3Data btVector3DoubleData
+#define btVector3DataName "btVector3DoubleData"
+#else
+#define btVector3Data btVector3FloatData
+#define btVector3DataName "btVector3FloatData"
+#endif //BT_USE_DOUBLE_PRECISION
+
+#if defined BT_USE_SSE
+
+//typedef  uint32_t __m128i __attribute__ ((vector_size(16)));
+
+#ifdef _MSC_VER
+#pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
+#endif
+
+
+#define BT_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
+//#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
+#define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
+#define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
+#define bt_splat_ps( _a, _i )  bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
+
+#define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
+#define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
+#define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
+#define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
+#define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
+#define btvxyzMaskf btvFFF0fMask
+#define btvAbsfMask btCastiTo128f(btvAbsMask)
+
+
+
+const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
+const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
+const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
+const __m128 ATTRIBUTE_ALIGNED16(v1_5)  = {1.5f, 1.5f, 1.5f, 1.5f};
+
+#endif
+
+#ifdef BT_USE_NEON
+
+const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
+const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
+const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
+const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
+
+#endif
+
 /**@brief btVector3 can be used to represent 3D points and vectors.
  * It has an un-used w component to suit 16-byte alignment when btVector3 is stored in containers. This extra component can be used by derived classes (Quaternion?) or by user
  * Ideally, this class should be replaced by a platform optimized SIMD version that keeps the data in registers
  */
-
 ATTRIBUTE_ALIGNED16(class) btVector3
 {
 public:
 
+	BT_DECLARE_ALIGNED_ALLOCATOR();
+
 #if defined (__SPU__) && defined (__CELLOS_LV2__)
 		btScalar	m_floats[4];
 public:
@@ -39,28 +90,31 @@ public:
 	}
 public:
 #else //__CELLOS_LV2__ __SPU__
-#ifdef BT_USE_SSE // WIN32
-	union {
-		__m128 mVec128;
-		btScalar	m_floats[4];
-	};
-	SIMD_FORCE_INLINE	__m128	get128() const
-	{
-		return mVec128;
-	}
-	SIMD_FORCE_INLINE	void	set128(__m128 v128)
-	{
-		mVec128 = v128;
-	}
-#else
-	btScalar	m_floats[4];
-#endif
+    #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
+        union {
+            btSimdFloat4      mVec128;
+            btScalar	m_floats[4];
+        };
+        SIMD_FORCE_INLINE	btSimdFloat4	get128() const
+        {
+            return mVec128;
+        }
+        SIMD_FORCE_INLINE	void	set128(btSimdFloat4 v128)
+        {
+            mVec128 = v128;
+        }
+    #else
+        btScalar	m_floats[4];
+    #endif
 #endif //__CELLOS_LV2__ __SPU__
 
 	public:
 
   /**@brief No initialization constructor */
-	SIMD_FORCE_INLINE btVector3() {}
+	SIMD_FORCE_INLINE btVector3() 
+	{
+
+	}
 
  
 	
@@ -69,21 +123,50 @@ public:
    * @param y Y value 
    * @param z Z value 
    */
-	SIMD_FORCE_INLINE btVector3(const btScalar& x, const btScalar& y, const btScalar& z)
+	SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
 	{
-		m_floats[0] = x;
-		m_floats[1] = y;
-		m_floats[2] = z;
-		m_floats[3] = btScalar(0.);
+		m_floats[0] = _x;
+		m_floats[1] = _y;
+		m_floats[2] = _z;
+		m_floats[3] = btScalar(0.f);
 	}
 
-	
+#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
+	// Set Vector 
+	SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
+	{
+		mVec128 = v;
+	}
+
+	// Copy constructor
+	SIMD_FORCE_INLINE btVector3(const btVector3& rhs)
+	{
+		mVec128 = rhs.mVec128;
+	}
+
+	// Assignment Operator
+	SIMD_FORCE_INLINE btVector3& 
+	operator=(const btVector3& v) 
+	{
+		mVec128 = v.mVec128;
+		
+		return *this;
+	}
+#endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 
+    
 /**@brief Add a vector to this one 
  * @param The vector to add to this one */
 	SIMD_FORCE_INLINE btVector3& operator+=(const btVector3& v)
 	{
-
-		m_floats[0] += v.m_floats[0]; m_floats[1] += v.m_floats[1];m_floats[2] += v.m_floats[2];
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = _mm_add_ps(mVec128, v.mVec128);
+#elif defined(BT_USE_NEON)
+		mVec128 = vaddq_f32(mVec128, v.mVec128);
+#else
+		m_floats[0] += v.m_floats[0]; 
+		m_floats[1] += v.m_floats[1];
+		m_floats[2] += v.m_floats[2];
+#endif
 		return *this;
 	}
 
@@ -92,14 +175,33 @@ public:
    * @param The vector to subtract */
 	SIMD_FORCE_INLINE btVector3& operator-=(const btVector3& v) 
 	{
-		m_floats[0] -= v.m_floats[0]; m_floats[1] -= v.m_floats[1];m_floats[2] -= v.m_floats[2];
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = _mm_sub_ps(mVec128, v.mVec128);
+#elif defined(BT_USE_NEON)
+		mVec128 = vsubq_f32(mVec128, v.mVec128);
+#else
+		m_floats[0] -= v.m_floats[0]; 
+		m_floats[1] -= v.m_floats[1];
+		m_floats[2] -= v.m_floats[2];
+#endif
 		return *this;
 	}
+	
   /**@brief Scale the vector
    * @param s Scale factor */
 	SIMD_FORCE_INLINE btVector3& operator*=(const btScalar& s)
 	{
-		m_floats[0] *= s; m_floats[1] *= s;m_floats[2] *= s;
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
+		vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
+		mVec128 = _mm_mul_ps(mVec128, vs);
+#elif defined(BT_USE_NEON)
+		mVec128 = vmulq_n_f32(mVec128, s);
+#else
+		m_floats[0] *= s; 
+		m_floats[1] *= s;
+		m_floats[2] *= s;
+#endif
 		return *this;
 	}
 
@@ -108,14 +210,42 @@ public:
 	SIMD_FORCE_INLINE btVector3& operator/=(const btScalar& s) 
 	{
 		btFullAssert(s != btScalar(0.0));
+
+#if 0 //defined(BT_USE_SSE_IN_API)
+// this code is not faster !
+		__m128 vs = _mm_load_ss(&s);
+		vs = _mm_div_ss(v1110, vs);
+		vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
+
+		mVec128 = _mm_mul_ps(mVec128, vs);
+		
+		return *this;
+#else
 		return *this *= btScalar(1.0) / s;
+#endif
 	}
 
   /**@brief Return the dot product
    * @param v The other vector in the dot product */
 	SIMD_FORCE_INLINE btScalar dot(const btVector3& v) const
 	{
-		return m_floats[0] * v.m_floats[0] + m_floats[1] * v.m_floats[1] +m_floats[2] * v.m_floats[2];
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)		
+		__m128 vd = _mm_mul_ps(mVec128, v.mVec128);
+		__m128 z = _mm_movehl_ps(vd, vd);
+		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
+		vd = _mm_add_ss(vd, y);
+		vd = _mm_add_ss(vd, z);
+		return _mm_cvtss_f32(vd);
+#elif defined(BT_USE_NEON)
+		float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
+		float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));  
+		x = vadd_f32(x, vget_high_f32(vd));
+		return vget_lane_f32(x, 0);
+#else	
+		return	m_floats[0] * v.m_floats[0] + 
+				m_floats[1] * v.m_floats[1] + 
+				m_floats[2] * v.m_floats[2];
+#endif
 	}
 
   /**@brief Return the length of the vector squared */
@@ -138,20 +268,70 @@ public:
    * This is symantically treating the vector like a point */
 	SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
 
+	SIMD_FORCE_INLINE btVector3& safeNormalize() 
+	{
+		btVector3 absVec = this->absolute();
+		int maxIndex = absVec.maxAxis();
+		if (absVec[maxIndex]>0)
+		{
+			*this /= absVec[maxIndex];
+			return *this /= length();
+		}
+		setValue(1,0,0);
+		return *this;
+	}
+
   /**@brief Normalize this vector 
    * x^2 + y^2 + z^2 = 1 */
 	SIMD_FORCE_INLINE btVector3& normalize() 
 	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)		
+        // dot product first
+		__m128 vd = _mm_mul_ps(mVec128, mVec128);
+		__m128 z = _mm_movehl_ps(vd, vd);
+		__m128 y = _mm_shuffle_ps(vd, vd, 0x55);
+		vd = _mm_add_ss(vd, y);
+		vd = _mm_add_ss(vd, z);
+		
+        #if 0
+        vd = _mm_sqrt_ss(vd);
+		vd = _mm_div_ss(v1110, vd);
+		vd = bt_splat_ps(vd, 0x80);
+		mVec128 = _mm_mul_ps(mVec128, vd);
+        #else
+        
+        // NR step 1/sqrt(x) - vd is x, y is output 
+        y = _mm_rsqrt_ss(vd); // estimate 
+        
+        //  one step NR 
+        z = v1_5;
+        vd = _mm_mul_ss(vd, vHalf); // vd * 0.5	
+        //x2 = vd;
+        vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
+        vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
+        z = _mm_sub_ss(z, vd);  // 1.5 - vd * 0.5 * y0 * y0 
+
+        y = _mm_mul_ss(y, z);   // y0 * (1.5 - vd * 0.5 * y0 * y0)
+
+		y = bt_splat_ps(y, 0x80);
+		mVec128 = _mm_mul_ps(mVec128, y);
+
+        #endif
+
+		
+		return *this;
+#else	
 		return *this /= length();
+#endif
 	}
 
   /**@brief Return a normalized version of this vector */
 	SIMD_FORCE_INLINE btVector3 normalized() const;
 
-  /**@brief Rotate this vector 
+  /**@brief Return a rotated version of this vector
    * @param wAxis The axis to rotate about 
    * @param angle The angle to rotate by */
-	SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle );
+	SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
 
   /**@brief Return the angle between this and another vector
    * @param v The other vector */
@@ -161,29 +341,111 @@ public:
 		btFullAssert(s != btScalar(0.0));
 		return btAcos(dot(v) / s);
 	}
+	
   /**@brief Return a vector will the absolute values of each element */
 	SIMD_FORCE_INLINE btVector3 absolute() const 
 	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
+		return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
+#elif defined(BT_USE_NEON)
+		return btVector3(vabsq_f32(mVec128));
+#else	
 		return btVector3(
 			btFabs(m_floats[0]), 
 			btFabs(m_floats[1]), 
 			btFabs(m_floats[2]));
+#endif
 	}
+	
   /**@brief Return the cross product between this and another vector 
    * @param v The other vector */
 	SIMD_FORCE_INLINE btVector3 cross(const btVector3& v) const
 	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	T, V;
+		
+		T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
+		V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
+		
+		V = _mm_mul_ps(V, mVec128);
+		T = _mm_mul_ps(T, v.mVec128);
+		V = _mm_sub_ps(V, T);
+		
+		V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
+		return btVector3(V);
+#elif defined(BT_USE_NEON)
+		float32x4_t T, V;
+		// form (Y, Z, X, _) of mVec128 and v.mVec128
+		float32x2_t Tlow = vget_low_f32(mVec128);
+		float32x2_t Vlow = vget_low_f32(v.mVec128);
+		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
+		
+		V = vmulq_f32(V, mVec128);
+		T = vmulq_f32(T, v.mVec128);
+		V = vsubq_f32(V, T);
+		Vlow = vget_low_f32(V);
+		// form (Y, Z, X, _);
+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
+		V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
+		
+		return btVector3(V);
+#else
 		return btVector3(
-			m_floats[1] * v.m_floats[2] -m_floats[2] * v.m_floats[1],
+			m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
 			m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
 			m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
+#endif
 	}
 
 	SIMD_FORCE_INLINE btScalar triple(const btVector3& v1, const btVector3& v2) const
 	{
-		return m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + 
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		// cross:
+		__m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
+		__m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3));	//	(Y Z X 0)
+		
+		V = _mm_mul_ps(V, v1.mVec128);
+		T = _mm_mul_ps(T, v2.mVec128);
+		V = _mm_sub_ps(V, T);
+		
+		V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
+
+		// dot: 
+		V = _mm_mul_ps(V, mVec128);
+		__m128 z = _mm_movehl_ps(V, V);
+		__m128 y = _mm_shuffle_ps(V, V, 0x55);
+		V = _mm_add_ss(V, y);
+		V = _mm_add_ss(V, z);
+		return _mm_cvtss_f32(V);
+
+#elif defined(BT_USE_NEON)
+		// cross:
+		float32x4_t T, V;
+		// form (Y, Z, X, _) of mVec128 and v.mVec128
+		float32x2_t Tlow = vget_low_f32(v1.mVec128);
+		float32x2_t Vlow = vget_low_f32(v2.mVec128);
+		T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
+		
+		V = vmulq_f32(V, v1.mVec128);
+		T = vmulq_f32(T, v2.mVec128);
+		V = vsubq_f32(V, T);
+		Vlow = vget_low_f32(V);
+		// form (Y, Z, X, _);
+		V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
+
+		// dot: 
+		V = vmulq_f32(mVec128, V);
+		float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));  
+		x = vadd_f32(x, vget_high_f32(V));
+		return vget_lane_f32(x, 0);
+#else
+		return 
+			m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) + 
 			m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) + 
 			m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
+#endif
 	}
 
   /**@brief Return the axis with the smallest value 
@@ -210,14 +472,31 @@ public:
 		return absolute().maxAxis();
 	}
 
+	
 	SIMD_FORCE_INLINE void setInterpolate3(const btVector3& v0, const btVector3& v1, btScalar rt)
 	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	vrt = _mm_load_ss(&rt);	//	(rt 0 0 0)
+		btScalar s = btScalar(1.0) - rt;
+		__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
+		vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
+		__m128 r0 = _mm_mul_ps(v0.mVec128, vs);
+		vrt = bt_pshufd_ps(vrt, 0x80);	//	(rt rt rt 0.0)
+		__m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
+		__m128 tmp3 = _mm_add_ps(r0,r1);
+		mVec128 = tmp3;
+#elif defined(BT_USE_NEON)
+		mVec128 = vsubq_f32(v1.mVec128, v0.mVec128);
+		mVec128 = vmulq_n_f32(mVec128, rt);
+		mVec128 = vaddq_f32(mVec128, v0.mVec128);
+#else	
 		btScalar s = btScalar(1.0) - rt;
 		m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
 		m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
 		m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
 		//don't do the unused w component
 		//		m_co[3] = s * v0[3] + rt * v1[3];
+#endif
 	}
 
   /**@brief Return the linear interpolation between this and another vector 
@@ -225,16 +504,41 @@ public:
    * @param t The ration of this to v (t = 0 => return this, t=1 => return other) */
 	SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const 
 	{
-		return btVector3(m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
-			m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
-			m_floats[2] + (v.m_floats[2] -m_floats[2]) * t);
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		__m128	vt = _mm_load_ss(&t);	//	(t 0 0 0)
+		vt = bt_pshufd_ps(vt, 0x80);	//	(rt rt rt 0.0)
+		__m128 vl = _mm_sub_ps(v.mVec128, mVec128);
+		vl = _mm_mul_ps(vl, vt);
+		vl = _mm_add_ps(vl, mVec128);
+		
+		return btVector3(vl);
+#elif defined(BT_USE_NEON)
+		float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
+		vl = vmulq_n_f32(vl, t);
+		vl = vaddq_f32(vl, mVec128);
+		
+		return btVector3(vl);
+#else	
+		return 
+			btVector3(	m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
+						m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
+						m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
+#endif
 	}
 
   /**@brief Elementwise multiply this vector by the other 
    * @param v The other vector */
 	SIMD_FORCE_INLINE btVector3& operator*=(const btVector3& v)
 	{
-		m_floats[0] *= v.m_floats[0]; m_floats[1] *= v.m_floats[1];m_floats[2] *= v.m_floats[2];
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = _mm_mul_ps(mVec128, v.mVec128);
+#elif defined(BT_USE_NEON)
+		mVec128 = vmulq_f32(mVec128, v.mVec128);
+#else	
+		m_floats[0] *= v.m_floats[0]; 
+		m_floats[1] *= v.m_floats[1];
+		m_floats[2] *= v.m_floats[2];
+#endif
 		return *this;
 	}
 
@@ -245,13 +549,13 @@ public:
   /**@brief Return the z value */
 		SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
   /**@brief Set the x value */
-		SIMD_FORCE_INLINE void	setX(btScalar x) { m_floats[0] = x;};
+		SIMD_FORCE_INLINE void	setX(btScalar _x) { m_floats[0] = _x;};
   /**@brief Set the y value */
-		SIMD_FORCE_INLINE void	setY(btScalar y) { m_floats[1] = y;};
+		SIMD_FORCE_INLINE void	setY(btScalar _y) { m_floats[1] = _y;};
   /**@brief Set the z value */
-		SIMD_FORCE_INLINE void	setZ(btScalar z) {m_floats[2] = z;};
+		SIMD_FORCE_INLINE void	setZ(btScalar _z) { m_floats[2] = _z;};
   /**@brief Set the w value */
-		SIMD_FORCE_INLINE void	setW(btScalar w) { m_floats[3] = w;};
+		SIMD_FORCE_INLINE void	setW(btScalar _w) { m_floats[3] = _w;};
   /**@brief Return the x value */
 		SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
   /**@brief Return the y value */
@@ -269,7 +573,14 @@ public:
 
 	SIMD_FORCE_INLINE	bool	operator==(const btVector3& other) const
 	{
-		return ((m_floats[3]==other.m_floats[3]) && (m_floats[2]==other.m_floats[2]) && (m_floats[1]==other.m_floats[1]) && (m_floats[0]==other.m_floats[0]));
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+        return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
+#else 
+		return ((m_floats[3]==other.m_floats[3]) && 
+                (m_floats[2]==other.m_floats[2]) && 
+                (m_floats[1]==other.m_floats[1]) && 
+                (m_floats[0]==other.m_floats[0]));
+#endif
 	}
 
 	SIMD_FORCE_INLINE	bool	operator!=(const btVector3& other) const
@@ -277,81 +588,230 @@ public:
 		return !(*this == other);
 	}
 
-	 /**@brief Set each element to the max of the current values and the values of another btVector3
+  /**@brief Set each element to the max of the current values and the values of another btVector3
    * @param other The other btVector3 to compare with 
    */
-		SIMD_FORCE_INLINE void	setMax(const btVector3& other)
-		{
-			btSetMax(m_floats[0], other.m_floats[0]);
-			btSetMax(m_floats[1], other.m_floats[1]);
-			btSetMax(m_floats[2], other.m_floats[2]);
-			btSetMax(m_floats[3], other.w());
-		}
+	SIMD_FORCE_INLINE void	setMax(const btVector3& other)
+	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = _mm_max_ps(mVec128, other.mVec128);
+#elif defined(BT_USE_NEON)
+		mVec128 = vmaxq_f32(mVec128, other.mVec128);
+#else
+		btSetMax(m_floats[0], other.m_floats[0]);
+		btSetMax(m_floats[1], other.m_floats[1]);
+		btSetMax(m_floats[2], other.m_floats[2]);
+		btSetMax(m_floats[3], other.w());
+#endif
+	}
+
   /**@brief Set each element to the min of the current values and the values of another btVector3
    * @param other The other btVector3 to compare with 
    */
-		SIMD_FORCE_INLINE void	setMin(const btVector3& other)
-		{
-			btSetMin(m_floats[0], other.m_floats[0]);
-			btSetMin(m_floats[1], other.m_floats[1]);
-			btSetMin(m_floats[2], other.m_floats[2]);
-			btSetMin(m_floats[3], other.w());
-		}
+	SIMD_FORCE_INLINE void	setMin(const btVector3& other)
+	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = _mm_min_ps(mVec128, other.mVec128);
+#elif defined(BT_USE_NEON)
+		mVec128 = vminq_f32(mVec128, other.mVec128);
+#else
+		btSetMin(m_floats[0], other.m_floats[0]);
+		btSetMin(m_floats[1], other.m_floats[1]);
+		btSetMin(m_floats[2], other.m_floats[2]);
+		btSetMin(m_floats[3], other.w());
+#endif
+	}
 
-		SIMD_FORCE_INLINE void 	setValue(const btScalar& x, const btScalar& y, const btScalar& z)
-		{
-			m_floats[0]=x;
-			m_floats[1]=y;
-			m_floats[2]=z;
-			m_floats[3] = btScalar(0.);
-		}
+	SIMD_FORCE_INLINE void 	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
+	{
+		m_floats[0]=_x;
+		m_floats[1]=_y;
+		m_floats[2]=_z;
+		m_floats[3] = btScalar(0.f);
+	}
 
-		void	getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
-		{
-			v0->setValue(0.		,-z()		,y());
-			v1->setValue(z()	,0.			,-x());
-			v2->setValue(-y()	,x()	,0.);
-		}
+	void	getSkewSymmetricMatrix(btVector3* v0,btVector3* v1,btVector3* v2) const
+	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+ 
+		__m128 V  = _mm_and_ps(mVec128, btvFFF0fMask);
+		__m128 V0 = _mm_xor_ps(btvMzeroMask, V);
+		__m128 V2 = _mm_movelh_ps(V0, V);
+		
+		__m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
+		
+        V0 = _mm_shuffle_ps(V0, V, 0xDB);
+		V2 = _mm_shuffle_ps(V2, V, 0xF9);
+		
+		v0->mVec128 = V0;
+		v1->mVec128 = V1;
+		v2->mVec128 = V2;
+#else
+		v0->setValue(0.		,-z()		,y());
+		v1->setValue(z()	,0.			,-x());
+		v2->setValue(-y()	,x()	,0.);
+#endif
+	}
 
-		void	setZero()
-		{
-			setValue(btScalar(0.),btScalar(0.),btScalar(0.));
-		}
+	void setZero()
+	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+		mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
+#elif defined(BT_USE_NEON)
+		int32x4_t vi = vdupq_n_s32(0); 
+		mVec128 = vreinterpretq_f32_s32(vi);
+#else	
+		setValue(btScalar(0.),btScalar(0.),btScalar(0.));
+#endif
+	}
 
+	SIMD_FORCE_INLINE bool isZero() const 
+	{
+		return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
+	}
+
+	SIMD_FORCE_INLINE bool fuzzyZero() const 
+	{
+		return length2() < SIMD_EPSILON;
+	}
+
+	SIMD_FORCE_INLINE	void	serialize(struct	btVector3Data& dataOut) const;
+
+	SIMD_FORCE_INLINE	void	deSerialize(const struct	btVector3Data& dataIn);
+
+	SIMD_FORCE_INLINE	void	serializeFloat(struct	btVector3FloatData& dataOut) const;
+
+	SIMD_FORCE_INLINE	void	deSerializeFloat(const struct	btVector3FloatData& dataIn);
+
+	SIMD_FORCE_INLINE	void	serializeDouble(struct	btVector3DoubleData& dataOut) const;
+
+	SIMD_FORCE_INLINE	void	deSerializeDouble(const struct	btVector3DoubleData& dataIn);
+    
+        /**@brief returns index of maximum dot product between this and vectors in array[]
+         * @param array The other vectors 
+         * @param array_count The number of other vectors 
+         * @param dotOut The maximum dot product */
+        SIMD_FORCE_INLINE   long    maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const; 
+
+        /**@brief returns index of minimum dot product between this and vectors in array[]
+         * @param array The other vectors 
+         * @param array_count The number of other vectors 
+         * @param dotOut The minimum dot product */    
+        SIMD_FORCE_INLINE   long    minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const; 
+
+    /* create a vector as  btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 ))  */
+    SIMD_FORCE_INLINE btVector3  dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
+    {
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+
+        __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
+        __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
+        __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
+        __m128 b0 = _mm_unpacklo_ps( a0, a1 );
+        __m128 b1 = _mm_unpackhi_ps( a0, a1 );
+        __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
+        __m128 r = _mm_movelh_ps( b0, b2 );
+        r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
+        a2 = _mm_and_ps( a2, btvxyzMaskf);
+        r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
+        return btVector3(r);
+        
+#elif defined(BT_USE_NEON)
+        static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
+        float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
+        float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
+        float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
+        float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
+        a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
+        float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
+        float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
+        return btVector3( vcombine_f32(b0, b1) );
+#else	
+		return btVector3( dot(v0), dot(v1), dot(v2));
+#endif
+    }
 };
 
 /**@brief Return the sum of two vectors (Point symantics)*/
 SIMD_FORCE_INLINE btVector3 
 operator+(const btVector3& v1, const btVector3& v2) 
 {
-	return btVector3(v1.m_floats[0] + v2.m_floats[0], v1.m_floats[1] + v2.m_floats[1], v1.m_floats[2] + v2.m_floats[2]);
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
+#elif defined(BT_USE_NEON)
+	return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
+#else
+	return btVector3(
+			v1.m_floats[0] + v2.m_floats[0], 
+			v1.m_floats[1] + v2.m_floats[1], 
+			v1.m_floats[2] + v2.m_floats[2]);
+#endif
 }
 
 /**@brief Return the elementwise product of two vectors */
 SIMD_FORCE_INLINE btVector3 
 operator*(const btVector3& v1, const btVector3& v2) 
 {
-	return btVector3(v1.m_floats[0] * v2.m_floats[0], v1.m_floats[1] * v2.m_floats[1], v1.m_floats[2] * v2.m_floats[2]);
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
+#elif defined(BT_USE_NEON)
+	return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
+#else
+	return btVector3(
+			v1.m_floats[0] * v2.m_floats[0], 
+			v1.m_floats[1] * v2.m_floats[1], 
+			v1.m_floats[2] * v2.m_floats[2]);
+#endif
 }
 
 /**@brief Return the difference between two vectors */
 SIMD_FORCE_INLINE btVector3 
 operator-(const btVector3& v1, const btVector3& v2)
 {
-	return btVector3(v1.m_floats[0] - v2.m_floats[0], v1.m_floats[1] - v2.m_floats[1], v1.m_floats[2] - v2.m_floats[2]);
+#if (defined(BT_USE_SSE_IN_API)  && defined(BT_USE_SSE))
+
+	//	without _mm_and_ps this code causes slowdown in Concave moving
+	__m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
+	return btVector3(_mm_and_ps(r, btvFFF0fMask));
+#elif defined(BT_USE_NEON)
+	float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
+	return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
+#else
+	return btVector3(
+			v1.m_floats[0] - v2.m_floats[0], 
+			v1.m_floats[1] - v2.m_floats[1], 
+			v1.m_floats[2] - v2.m_floats[2]);
+#endif
 }
+
 /**@brief Return the negative of the vector */
 SIMD_FORCE_INLINE btVector3 
 operator-(const btVector3& v)
 {
+#if (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
+	__m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
+	return btVector3(_mm_and_ps(r, btvFFF0fMask)); 
+#elif defined(BT_USE_NEON)
+	return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
+#else	
 	return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
+#endif
 }
 
 /**@brief Return the vector scaled by s */
 SIMD_FORCE_INLINE btVector3 
 operator*(const btVector3& v, const btScalar& s)
 {
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	__m128	vs = _mm_load_ss(&s);	//	(S 0 0 0)
+	vs = bt_pshufd_ps(vs, 0x80);	//	(S S S 0.0)
+	return btVector3(_mm_mul_ps(v.mVec128, vs));
+#elif defined(BT_USE_NEON)
+	float32x4_t r = vmulq_n_f32(v.mVec128, s);
+	return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
+#else
 	return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
+#endif
 }
 
 /**@brief Return the vector scaled by s */
@@ -366,14 +826,46 @@ SIMD_FORCE_INLINE btVector3
 operator/(const btVector3& v, const btScalar& s)
 {
 	btFullAssert(s != btScalar(0.0));
+#if 0 //defined(BT_USE_SSE_IN_API)
+// this code is not faster !
+	__m128 vs = _mm_load_ss(&s);
+    vs = _mm_div_ss(v1110, vs);
+	vs = bt_pshufd_ps(vs, 0x00);	//	(S S S S)
+
+	return btVector3(_mm_mul_ps(v.mVec128, vs));
+#else
 	return v * (btScalar(1.0) / s);
+#endif
 }
 
 /**@brief Return the vector inversely scaled by s */
 SIMD_FORCE_INLINE btVector3
 operator/(const btVector3& v1, const btVector3& v2)
 {
-	return btVector3(v1.m_floats[0] / v2.m_floats[0],v1.m_floats[1] / v2.m_floats[1],v1.m_floats[2] / v2.m_floats[2]);
+#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
+	__m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
+	vec = _mm_and_ps(vec, btvFFF0fMask);
+	return btVector3(vec); 
+#elif defined(BT_USE_NEON)
+	float32x4_t x, y, v, m;
+
+	x = v1.mVec128;
+	y = v2.mVec128;
+	
+	v = vrecpeq_f32(y);			// v ~ 1/y
+	m = vrecpsq_f32(y, v);		// m = (2-v*y)
+	v = vmulq_f32(v, m);		// vv = v*m ~~ 1/y
+	m = vrecpsq_f32(y, v);		// mm = (2-vv*y)
+	v = vmulq_f32(v, x);		// x*vv
+	v = vmulq_f32(v, m);		// (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
+
+	return btVector3(v);
+#else
+	return btVector3(
+			v1.m_floats[0] / v2.m_floats[0], 
+			v1.m_floats[1] / v2.m_floats[1],
+			v1.m_floats[2] / v2.m_floats[2]);
+#endif
 }
 
 /**@brief Return the dot product between two vectors */
@@ -443,22 +935,135 @@ SIMD_FORCE_INLINE btScalar btVector3::distance(const btVector3& v) const
 
 SIMD_FORCE_INLINE btVector3 btVector3::normalized() const
 {
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+	btVector3 norm = *this;
+
+	return norm.normalize();
+#else
 	return *this / length();
+#endif
 } 
 
-SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar angle )
+SIMD_FORCE_INLINE btVector3 btVector3::rotate( const btVector3& wAxis, const btScalar _angle ) const
 {
 	// wAxis must be a unit lenght vector
 
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
+
+    __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
+	btScalar ssin = btSin( _angle );
+    __m128 C = wAxis.cross( mVec128 ).mVec128;
+	O = _mm_and_ps(O, btvFFF0fMask);
+    btScalar scos = btCos( _angle );
+	
+	__m128 vsin = _mm_load_ss(&ssin);	//	(S 0 0 0)
+    __m128 vcos = _mm_load_ss(&scos);	//	(S 0 0 0)
+	
+	__m128 Y = bt_pshufd_ps(O, 0xC9);	//	(Y Z X 0)
+	__m128 Z = bt_pshufd_ps(O, 0xD2);	//	(Z X Y 0)
+	O = _mm_add_ps(O, Y);
+	vsin = bt_pshufd_ps(vsin, 0x80);	//	(S S S 0)
+	O = _mm_add_ps(O, Z);
+    vcos = bt_pshufd_ps(vcos, 0x80);	//	(S S S 0)
+	
+    vsin = vsin * C; 
+	O = O * wAxis.mVec128; 
+	__m128 X = mVec128 - O; 
+	
+    O = O + vsin;
+	vcos = vcos * X;
+	O = O + vcos;	
+	
+	return btVector3(O);
+#else
 	btVector3 o = wAxis * wAxis.dot( *this );
-	btVector3 x = *this - o;
-	btVector3 y;
+	btVector3 _x = *this - o;
+	btVector3 _y;
 
-	y = wAxis.cross( *this );
+	_y = wAxis.cross( *this );
 
-	return ( o + x * btCos( angle ) + y * btSin( angle ) );
+	return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
+#endif
 }
 
+SIMD_FORCE_INLINE   long    btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
+{
+#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+    #if defined _WIN32 || defined (BT_USE_SSE)
+        const long scalar_cutoff = 10;
+        long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
+    #elif defined BT_USE_NEON
+        const long scalar_cutoff = 4;
+        extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
+    #endif
+    if( array_count < scalar_cutoff )
+#else
+	
+#endif//BT_USE_SSE || BT_USE_NEON
+    {
+        btScalar maxDot = -SIMD_INFINITY;
+        int i = 0;
+        int ptIndex = -1;
+        for( i = 0; i < array_count; i++ )
+        {
+            btScalar dot = array[i].dot(*this);
+            
+            if( dot > maxDot )
+            {
+                maxDot = dot;
+                ptIndex = i;
+            }
+        }
+        
+        dotOut = maxDot;
+        return ptIndex;
+    }
+#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+    return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
+#endif
+}
+
+SIMD_FORCE_INLINE   long    btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
+{
+#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+    #if defined BT_USE_SSE
+        const long scalar_cutoff = 10;
+        long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
+    #elif defined BT_USE_NEON
+        const long scalar_cutoff = 4;
+        extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
+    #else
+        #error unhandled arch!
+    #endif
+    
+    if( array_count < scalar_cutoff )
+#endif//BT_USE_SSE || BT_USE_NEON
+    {
+        btScalar  minDot = SIMD_INFINITY;
+        int i = 0;
+        int ptIndex = -1;
+        
+        for( i = 0; i < array_count; i++ )
+        {
+            btScalar dot = array[i].dot(*this);
+            
+            if( dot < minDot )
+            {
+                minDot = dot;
+                ptIndex = i;
+            }
+        }
+        
+        dotOut = minDot;
+        
+        return ptIndex;
+    }
+#if defined (BT_USE_SSE) || defined (BT_USE_NEON)
+    return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
+#endif
+}
+
+
 class btVector4 : public btVector3
 {
 public:
@@ -466,24 +1071,47 @@ public:
 	SIMD_FORCE_INLINE btVector4() {}
 
 
-	SIMD_FORCE_INLINE btVector4(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w) 
-		: btVector3(x,y,z)
+	SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w) 
+		: btVector3(_x,_y,_z)
 	{
-		m_floats[3] = w;
+		m_floats[3] = _w;
 	}
 
+#if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON) 
+	SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
+	{
+		mVec128 = vec;
+	}
+
+	SIMD_FORCE_INLINE btVector4(const btVector3& rhs)
+	{
+		mVec128 = rhs.mVec128;
+	}
+
+	SIMD_FORCE_INLINE btVector4& 
+	operator=(const btVector4& v) 
+	{
+		mVec128 = v.mVec128;
+		return *this;
+	}
+#endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 
 
 	SIMD_FORCE_INLINE btVector4 absolute4() const 
 	{
+#if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 
+		return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
+#elif defined(BT_USE_NEON)
+		return btVector4(vabsq_f32(mVec128));
+#else	
 		return btVector4(
 			btFabs(m_floats[0]), 
 			btFabs(m_floats[1]), 
 			btFabs(m_floats[2]),
 			btFabs(m_floats[3]));
+#endif
 	}
 
 
-
 	btScalar	getW() const { return m_floats[3];}
 
 
@@ -511,12 +1139,8 @@ public:
 			maxIndex = 3;
 			maxVal = m_floats[3];
 		}
-		
-		
-		
 
 		return maxIndex;
-
 	}
 
 
@@ -546,7 +1170,6 @@ public:
 		}
 		
 		return minIndex;
-
 	}
 
 
@@ -578,17 +1201,15 @@ public:
    * @param z Value of z
    * @param w Value of w
    */
-		SIMD_FORCE_INLINE void	setValue(const btScalar& x, const btScalar& y, const btScalar& z,const btScalar& w)
+		SIMD_FORCE_INLINE void	setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
 		{
-			m_floats[0]=x;
-			m_floats[1]=y;
-			m_floats[2]=z;
-			m_floats[3]=w;
+			m_floats[0]=_x;
+			m_floats[1]=_y;
+			m_floats[2]=_z;
+			m_floats[3]=_w;
 		}
 
 
- 
-
 };
 
 
@@ -637,24 +1258,86 @@ SIMD_FORCE_INLINE void	btUnSwapVector3Endian(btVector3& vector)
 	vector = swappedVec;
 }
 
-SIMD_FORCE_INLINE void btPlaneSpace1 (const btVector3& n, btVector3& p, btVector3& q)
+template <class T>
+SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
 {
-  if (btFabs(n.z()) > SIMDSQRT12) {
+  if (btFabs(n[2]) > SIMDSQRT12) {
     // choose p in y-z plane
     btScalar a = n[1]*n[1] + n[2]*n[2];
     btScalar k = btRecipSqrt (a);
-    p.setValue(0,-n[2]*k,n[1]*k);
+    p[0] = 0;
+	p[1] = -n[2]*k;
+	p[2] = n[1]*k;
     // set q = n x p
-    q.setValue(a*k,-n[0]*p[2],n[0]*p[1]);
+    q[0] = a*k;
+	q[1] = -n[0]*p[2];
+	q[2] = n[0]*p[1];
   }
   else {
     // choose p in x-y plane
-    btScalar a = n.x()*n.x() + n.y()*n.y();
+    btScalar a = n[0]*n[0] + n[1]*n[1];
     btScalar k = btRecipSqrt (a);
-    p.setValue(-n.y()*k,n.x()*k,0);
+    p[0] = -n[1]*k;
+	p[1] = n[0]*k;
+	p[2] = 0;
     // set q = n x p
-    q.setValue(-n.z()*p.y(),n.z()*p.x(),a*k);
+    q[0] = -n[2]*p[1];
+	q[1] = n[2]*p[0];
+	q[2] = a*k;
   }
 }
 
-#endif //SIMD__VECTOR3_H
+
+struct	btVector3FloatData
+{
+	float	m_floats[4];
+};
+
+struct	btVector3DoubleData
+{
+	double	m_floats[4];
+
+};
+
+SIMD_FORCE_INLINE	void	btVector3::serializeFloat(struct	btVector3FloatData& dataOut) const
+{
+	///could also do a memcpy, check if it is worth it
+	for (int i=0;i<4;i++)
+		dataOut.m_floats[i] = float(m_floats[i]);
+}
+
+SIMD_FORCE_INLINE void	btVector3::deSerializeFloat(const struct	btVector3FloatData& dataIn)
+{
+	for (int i=0;i<4;i++)
+		m_floats[i] = btScalar(dataIn.m_floats[i]);
+}
+
+
+SIMD_FORCE_INLINE	void	btVector3::serializeDouble(struct	btVector3DoubleData& dataOut) const
+{
+	///could also do a memcpy, check if it is worth it
+	for (int i=0;i<4;i++)
+		dataOut.m_floats[i] = double(m_floats[i]);
+}
+
+SIMD_FORCE_INLINE void	btVector3::deSerializeDouble(const struct	btVector3DoubleData& dataIn)
+{
+	for (int i=0;i<4;i++)
+		m_floats[i] = btScalar(dataIn.m_floats[i]);
+}
+
+
+SIMD_FORCE_INLINE	void	btVector3::serialize(struct	btVector3Data& dataOut) const
+{
+	///could also do a memcpy, check if it is worth it
+	for (int i=0;i<4;i++)
+		dataOut.m_floats[i] = m_floats[i];
+}
+
+SIMD_FORCE_INLINE void	btVector3::deSerialize(const struct	btVector3Data& dataIn)
+{
+	for (int i=0;i<4;i++)
+		m_floats[i] = dataIn.m_floats[i];
+}
+
+#endif //BT_VECTOR3_H
diff --git a/Engine/lib/bullet/src/LinearMath/ibmsdk/Makefile b/Engine/lib/bullet/src/LinearMath/ibmsdk/Makefile
deleted file mode 100644
index 04148730f..000000000
--- a/Engine/lib/bullet/src/LinearMath/ibmsdk/Makefile
+++ /dev/null
@@ -1,39 +0,0 @@
-#### Source code Dirs
-VPATH =	../
-
-ROOT = ../../..
-
-#### Library
-LIBRARY_ppu = bulletmath.a
-
-#### Compiler flags
-CPPFLAGS        = 		\
--DUSE_LIBSPE2			\
--I$(ROOT)/src			\
--I$(SDKINC)
-
-#### Optimization level flags
-#CC_OPT_LEVEL =  $(CC_OPT_LEVEL_DEBUG)
-CC_OPT_LEVEL =  -O3
-
-##### Objects to be archived in lib
-
-OBJS = 					\
-btAlignedAllocator.o			\
-btGeometryUtil.o			\
-btQuickprof.o
-
-#### Install directories
-INSTALL_DIR	=  $(ROOT)/lib/ibmsdk
-INSTALL_FILES	= $(LIBRARY_ppu)
-
-IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
-
-ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
-        CELL_TOP ?= /opt/cell/sdk
-        include $(CELL_TOP)/buildutils/make.footer
-else
-        CELL_TOP ?= /opt/ibm/cell-sdk/prototype
-        include $(CELL_TOP)/make.footer
-endif
-
diff --git a/Engine/lib/bullet/src/LinearMath/premake4.lua b/Engine/lib/bullet/src/LinearMath/premake4.lua
new file mode 100644
index 000000000..0f0a88a4e
--- /dev/null
+++ b/Engine/lib/bullet/src/LinearMath/premake4.lua
@@ -0,0 +1,11 @@
+	project "LinearMath"
+		
+	kind "StaticLib"
+	targetdir "../../lib"
+	includedirs {
+		"..",
+	}
+	files {
+		"**.cpp",
+		"**.h"
+	}
\ No newline at end of file
diff --git a/Engine/lib/bullet/src/Makefile.am b/Engine/lib/bullet/src/Makefile.am
index 6c246d176..ec39e7a05 100644
--- a/Engine/lib/bullet/src/Makefile.am
+++ b/Engine/lib/bullet/src/Makefile.am
@@ -37,10 +37,10 @@ nobase_bullet_include_HEADERS += \
 	BulletMultiThreaded/Win32ThreadSupport.h \
 	BulletMultiThreaded/SequentialThreadSupport.h
 
-lib_LTLIBRARIES	= libbulletmath.la libbulletcollision.la libbulletdynamics.la libbulletsoftbody.la libbulletmultithreaded.la
+lib_LTLIBRARIES	= libLinearMath.la libBulletCollision.la libBulletDynamics.la libBulletSoftBody.la libBulletMultiThreaded.la
 
-libbulletmultithreaded_la_CXXFLAGS = ${CXXFLAGS} -I./BulletMultiThreaded/vectormath/scalar/cpp
-libbulletmultithreaded_la_SOURCES =\
+libBulletMultiThreaded_la_CXXFLAGS = ${CXXFLAGS} -I./BulletMultiThreaded/vectormath/scalar/cpp
+libBulletMultiThreaded_la_SOURCES =\
 		BulletMultiThreaded/SpuCollisionObjectWrapper.cpp \
 		BulletMultiThreaded/SpuSampleTask/SpuSampleTask.cpp \
 		BulletMultiThreaded/SpuLibspe2Support.cpp \
@@ -85,21 +85,26 @@ libbulletmultithreaded_la_SOURCES =\
 		BulletMultiThreaded/SpuNarrowPhaseCollisionTask/Box.h
 
 else
-lib_LTLIBRARIES	= libbulletmath.la libbulletcollision.la libbulletdynamics.la libbulletsoftbody.la
+lib_LTLIBRARIES	= libLinearMath.la libBulletCollision.la libBulletDynamics.la libBulletSoftBody.la
 endif
 
 
-libbulletmath_la_SOURCES	= \
+libLinearMath_la_SOURCES	= \
 		LinearMath/btQuickprof.cpp \
 		LinearMath/btGeometryUtil.cpp \
 		LinearMath/btAlignedAllocator.cpp \
+		LinearMath/btSerializer.cpp \
 		LinearMath/btConvexHull.cpp \
+		LinearMath/btPolarDecomposition.cpp \
+		LinearMath/btVector3.cpp \
+		LinearMath/btConvexHullComputer.cpp \
 		LinearMath/btHashMap.h \
 		LinearMath/btConvexHull.h \
 		LinearMath/btAabbUtil2.h \
 		LinearMath/btGeometryUtil.h \
 		LinearMath/btQuadWord.h \
 		LinearMath/btPoolAllocator.h \
+		LinearMath/btPolarDecomposition.h \
 		LinearMath/btScalar.h \
 		LinearMath/btMinMax.h \
 		LinearMath/btVector3.h \
@@ -111,6 +116,7 @@ libbulletmath_la_SOURCES	= \
 		LinearMath/btQuaternion.h \
 		LinearMath/btAlignedObjectArray.h \
 		LinearMath/btQuickprof.h \
+		LinearMath/btSerializer.h \
 		LinearMath/btTransformUtil.h \
 		LinearMath/btTransform.h \
 		LinearMath/btDefaultMotionState.h \
@@ -118,7 +124,7 @@ libbulletmath_la_SOURCES	= \
 		LinearMath/btRandom.h
 
 
-libbulletcollision_la_SOURCES = \
+libBulletCollision_la_SOURCES = \
 		BulletCollision/NarrowPhaseCollision/btRaycastCallback.cpp \
 		BulletCollision/NarrowPhaseCollision/btMinkowskiPenetrationDepthSolver.cpp \
 		BulletCollision/NarrowPhaseCollision/btSubSimplexConvexCast.cpp \
@@ -126,6 +132,7 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/NarrowPhaseCollision/btGjkConvexCast.cpp \
 		BulletCollision/NarrowPhaseCollision/btPersistentManifold.cpp \
 		BulletCollision/NarrowPhaseCollision/btConvexCast.cpp \
+		BulletCollision/NarrowPhaseCollision/btPolyhedralContactClipping.cpp \
 		BulletCollision/NarrowPhaseCollision/btContinuousConvexCollision.cpp \
 		BulletCollision/NarrowPhaseCollision/btGjkPairDetector.cpp \
 		BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp \
@@ -143,11 +150,14 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp \
 		BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.cpp \
 		BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.cpp \
+		BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.cpp \
 		BulletCollision/CollisionDispatch/SphereTriangleDetector.cpp \
+		BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp \
 		BulletCollision/CollisionDispatch/btManifoldResult.cpp \
 		BulletCollision/CollisionDispatch/btCollisionWorld.cpp \
 		BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.cpp \
 		BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp \
+		BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.cpp \
 		BulletCollision/CollisionDispatch/btUnionFind.cpp \
 		BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp \
 		BulletCollision/CollisionShapes/btTetrahedronShape.cpp \
@@ -155,6 +165,7 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/CollisionShapes/btMinkowskiSumShape.cpp \
 		BulletCollision/CollisionShapes/btCompoundShape.cpp \
 		BulletCollision/CollisionShapes/btConeShape.cpp \
+		BulletCollision/CollisionShapes/btConvexPolyhedron.cpp \
 		BulletCollision/CollisionShapes/btMultiSphereShape.cpp \
 		BulletCollision/CollisionShapes/btUniformScalingShape.cpp \
 		BulletCollision/CollisionShapes/btSphereShape.cpp \
@@ -167,6 +178,7 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/CollisionShapes/btEmptyShape.cpp \
 		BulletCollision/CollisionShapes/btCollisionShape.cpp \
 		BulletCollision/CollisionShapes/btConvexShape.cpp \
+		BulletCollision/CollisionShapes/btConvex2dShape.cpp \
 		BulletCollision/CollisionShapes/btConvexInternalShape.cpp \
 		BulletCollision/CollisionShapes/btConvexHullShape.cpp \
 		BulletCollision/CollisionShapes/btTriangleCallback.cpp \
@@ -175,6 +187,7 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/CollisionShapes/btConcaveShape.cpp \
 		BulletCollision/CollisionShapes/btConvexPointCloudShape.cpp \
 		BulletCollision/CollisionShapes/btBoxShape.cpp \
+		BulletCollision/CollisionShapes/btBox2dShape.cpp \
 		BulletCollision/CollisionShapes/btOptimizedBvh.cpp \
 		BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp \
 		BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.cpp \
@@ -210,13 +223,16 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/NarrowPhaseCollision/btManifoldPoint.h \
 		BulletCollision/NarrowPhaseCollision/btDiscreteCollisionDetectorInterface.h \
 		BulletCollision/CollisionDispatch/btCollisionObject.h \
+        BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h \
 		BulletCollision/CollisionDispatch/btGhostObject.h \
 		BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h \
 		BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h \
+		BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h \
 		BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h \
 		BulletCollision/CollisionDispatch/btEmptyCollisionAlgorithm.h \
 		BulletCollision/CollisionDispatch/btCollisionCreateFunc.h \
 		BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h \
+		BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h \
 		BulletCollision/CollisionDispatch/btBoxBoxDetector.h \
 		BulletCollision/CollisionDispatch/btCollisionDispatcher.h \
 		BulletCollision/CollisionDispatch/SphereTriangleDetector.h \
@@ -226,16 +242,19 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/CollisionDispatch/btSimulationIslandManager.h \
 		BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h \
 		BulletCollision/CollisionDispatch/btCollisionWorld.h \
+		BulletCollision/CollisionDispatch/btInternalEdgeUtility.h \
 		BulletCollision/CollisionDispatch/btManifoldResult.h \
 		BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h \
 		BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h \
 		BulletCollision/CollisionDispatch/btCollisionConfiguration.h \
 		BulletCollision/CollisionShapes/btConvexShape.h \
+		BulletCollision/CollisionShapes/btConvex2dShape.h \
 		BulletCollision/CollisionShapes/btTriangleCallback.h \
 		BulletCollision/CollisionShapes/btPolyhedralConvexShape.h \
 		BulletCollision/CollisionShapes/btMultimaterialTriangleMeshShape.h \
 		BulletCollision/CollisionShapes/btCompoundShape.h \
 		BulletCollision/CollisionShapes/btBoxShape.h \
+		BulletCollision/CollisionShapes/btBox2dShape.h \
 		BulletCollision/CollisionShapes/btMultiSphereShape.h \
 		BulletCollision/CollisionShapes/btCollisionMargin.h \
 		BulletCollision/CollisionShapes/btConcaveShape.h \
@@ -245,6 +264,7 @@ libbulletcollision_la_SOURCES = \
 		BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h \
 		BulletCollision/CollisionShapes/btMaterial.h \
 		BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h \
+		BulletCollision/CollisionShapes/btTriangleInfoMap.h \
 		BulletCollision/CollisionShapes/btSphereShape.h \
 		BulletCollision/CollisionShapes/btConvexPointCloudShape.h \
 		BulletCollision/CollisionShapes/btCapsuleShape.h \
@@ -288,12 +308,12 @@ libbulletcollision_la_SOURCES = \
                 BulletCollision/Gimpact/gim_memory.cpp\
                 BulletCollision/Gimpact/gim_tri_collision.cpp
 
-libbulletdynamics_la_SOURCES = \
-		BulletDynamics/Dynamics/btContinuousDynamicsWorld.cpp \
+libBulletDynamics_la_SOURCES = \
 		BulletDynamics/Dynamics/btRigidBody.cpp \
 		BulletDynamics/Dynamics/btSimpleDynamicsWorld.cpp \
 		BulletDynamics/Dynamics/Bullet-C-API.cpp \
 		BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp \
+		BulletDynamics/ConstraintSolver/btGearConstraint.cpp \
 		BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.cpp \
 		BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.cpp \
 		BulletDynamics/ConstraintSolver/btSolve2LinearConstraint.cpp \
@@ -310,8 +330,8 @@ libbulletdynamics_la_SOURCES = \
 		BulletDynamics/Vehicle/btRaycastVehicle.cpp \
 		BulletDynamics/Character/btKinematicCharacterController.cpp \
 		BulletDynamics/Character/btKinematicCharacterController.h \
-		BulletDynamics/Character/btCharacterControllerInterface.h \		
-		BulletDynamics/Dynamics/btContinuousDynamicsWorld.h \
+		BulletDynamics/Character/btCharacterControllerInterface.h \
+		BulletDynamics/Dynamics/btActionInterface.h \
 		BulletDynamics/Dynamics/btSimpleDynamicsWorld.h \
 		BulletDynamics/Dynamics/btRigidBody.h \
 		BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h \
@@ -326,6 +346,7 @@ libbulletdynamics_la_SOURCES = \
 		BulletDynamics/ConstraintSolver/btJacobianEntry.h \
 		BulletDynamics/ConstraintSolver/btSolverConstraint.h \
 		BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h \
+		BulletDynamics/ConstraintSolver/btGearConstraint.h \
 		BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h \
 		BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h \
 		BulletDynamics/ConstraintSolver/btSliderConstraint.h \
@@ -337,7 +358,8 @@ libbulletdynamics_la_SOURCES = \
 		BulletDynamics/Vehicle/btRaycastVehicle.h \
 		BulletDynamics/Vehicle/btWheelInfo.h
 
-libbulletsoftbody_la_SOURCES = \
+libBulletSoftBody_la_SOURCES = \
+		BulletSoftBody/btDefaultSoftBodySolver.cpp \
 		BulletSoftBody/btSoftBodyRigidBodyCollisionConfiguration.cpp \
 		BulletSoftBody/btSoftBody.cpp \
 		BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp \
@@ -370,7 +392,7 @@ nobase_bullet_include_HEADERS += \
 	BulletDynamics/Vehicle/btRaycastVehicle.h \
 	BulletDynamics/Vehicle/btWheelInfo.h \
 	BulletDynamics/Vehicle/btVehicleRaycaster.h \
-	BulletDynamics/Dynamics/btContinuousDynamicsWorld.h \
+	BulletDynamics/Dynamics/btActionInterface.h \
 	BulletDynamics/Dynamics/btRigidBody.h \
 	BulletDynamics/Dynamics/btDynamicsWorld.h \
 	BulletDynamics/Dynamics/btSimpleDynamicsWorld.h \
@@ -383,6 +405,7 @@ nobase_bullet_include_HEADERS += \
 	BulletDynamics/ConstraintSolver/btConstraintSolver.h \
 	BulletDynamics/ConstraintSolver/btContactConstraint.h \
 	BulletDynamics/ConstraintSolver/btContactSolverInfo.h \
+	BulletDynamics/ConstraintSolver/btGearConstraint.h \
 	BulletDynamics/ConstraintSolver/btGeneric6DofConstraint.h \
 	BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h \
 	BulletDynamics/ConstraintSolver/btJacobianEntry.h \
@@ -418,13 +441,16 @@ nobase_bullet_include_HEADERS += \
 	BulletCollision/CollisionShapes/btTriangleCallback.h \
 	BulletCollision/CollisionShapes/btTriangleIndexVertexMaterialArray.h \
 	BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h \
+	BulletCollision/CollisionShapes/btTriangleInfoMap.h \
 	BulletCollision/CollisionShapes/btTriangleBuffer.h \
 	BulletCollision/CollisionShapes/btConvexShape.h \
+	BulletCollision/CollisionShapes/btConvex2dShape.h \
 	BulletCollision/CollisionShapes/btStaticPlaneShape.h \
 	BulletCollision/CollisionShapes/btConeShape.h \
 	BulletCollision/CollisionShapes/btCollisionShape.h \
 	BulletCollision/CollisionShapes/btTriangleShape.h \
 	BulletCollision/CollisionShapes/btBoxShape.h \
+	BulletCollision/CollisionShapes/btBox2dShape.h \
 	BulletCollision/CollisionShapes/btMinkowskiSumShape.h \
 	BulletCollision/CollisionShapes/btTriangleMeshShape.h \
 	BulletCollision/CollisionShapes/btMaterial.h \
@@ -467,9 +493,12 @@ nobase_bullet_include_HEADERS += \
 	BulletCollision/CollisionDispatch/btCollisionCreateFunc.h \
 	BulletCollision/CollisionDispatch/btSphereTriangleCollisionAlgorithm.h \
 	BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.h \
+	BulletCollision/CollisionDispatch/btConvex2dConvex2dAlgorithm.h \
 	BulletCollision/CollisionDispatch/btCollisionObject.h \
+    BulletCollision/CollisionDispatch/btCollisionObjectWrapper.h \
 	BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.h \
 	BulletCollision/CollisionDispatch/btBoxBoxCollisionAlgorithm.h \
+	BulletCollision/CollisionDispatch/btBox2dBox2dCollisionAlgorithm.h \
 	BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h \
 	BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.h \
 	BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h \
@@ -479,6 +508,7 @@ nobase_bullet_include_HEADERS += \
 	BulletCollision/CollisionDispatch/btConvexConcaveCollisionAlgorithm.h \
 	BulletCollision/CollisionDispatch/btBoxBoxDetector.h \
 	BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h \
+	BulletCollision/CollisionDispatch/btInternalEdgeUtility.h \
 	BulletCollision/CollisionDispatch/btManifoldResult.h \
 	BulletCollision/Gimpact/gim_memory.h \
 	BulletCollision/Gimpact/gim_clip_polygon.h \
@@ -513,6 +543,7 @@ nobase_bullet_include_HEADERS += \
 	LinearMath/btMatrix3x3.h \
 	LinearMath/btVector3.h \
 	LinearMath/btPoolAllocator.h \
+	LinearMath/btPolarDecomposition.h \
 	LinearMath/btScalar.h \
 	LinearMath/btDefaultMotionState.h \
 	LinearMath/btTransform.h \
@@ -528,4 +559,5 @@ nobase_bullet_include_HEADERS += \
 	LinearMath/btStackAlloc.h \
 	LinearMath/btAlignedObjectArray.h \
 	LinearMath/btHashMap.h \
-	LinearMath/btQuickprof.h
+	LinearMath/btQuickprof.h\
+	LinearMath/btSerializer.h
diff --git a/Engine/lib/bullet/src/MiniCL/CMakeLists.txt b/Engine/lib/bullet/src/MiniCL/CMakeLists.txt
new file mode 100644
index 000000000..ed47db0cb
--- /dev/null
+++ b/Engine/lib/bullet/src/MiniCL/CMakeLists.txt
@@ -0,0 +1,66 @@
+#MiniCL provides a small subset of OpenCL
+
+INCLUDE_DIRECTORIES(
+	${BULLET_PHYSICS_SOURCE_DIR}/src
+	${VECTOR_MATH_INCLUDE}
+)
+
+SET(MiniCL_SRCS
+	MiniCL.cpp
+	MiniCLTaskScheduler.cpp
+	MiniCLTask/MiniCLTask.cpp
+)
+
+SET(Root_HDRS
+	MiniCLTaskScheduler.h
+	cl.h
+	cl_gl.h
+	cl_platform.h
+	cl_MiniCL_Defs.h
+)
+
+SET(MiniCLTask_HDRS
+	MiniCLTask/MiniCLTask.h
+)
+
+SET(MiniCL_HDRS
+	${Root_HDRS}
+	${MiniCLTask_HDRS}
+)
+
+ADD_LIBRARY(MiniCL ${MiniCL_SRCS} ${MiniCL_HDRS} )
+SET_TARGET_PROPERTIES(MiniCL PROPERTIES VERSION ${BULLET_VERSION})
+SET_TARGET_PROPERTIES(MiniCL PROPERTIES SOVERSION ${BULLET_VERSION})
+
+
+IF (BUILD_SHARED_LIBS)
+	TARGET_LINK_LIBRARIES(MiniCL BulletMultiThreaded BulletDynamics BulletCollision)
+ENDIF (BUILD_SHARED_LIBS)
+
+IF (INSTALL_LIBS)
+	IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+		#INSTALL of other files requires CMake 2.6
+		IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+#			IF(INSTALL_EXTRA_LIBS)
+				IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+					INSTALL(TARGETS MiniCL DESTINATION .)
+				ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+				INSTALL(TARGETS MiniCL DESTINATION lib${LIB_SUFFIX})
+					INSTALL(DIRECTORY
+${CMAKE_CURRENT_SOURCE_DIR} DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING
+PATTERN "*.h"  PATTERN ".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE)
+				ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+#			ENDIF (INSTALL_EXTRA_LIBS)
+		ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5)
+
+		IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+			SET_TARGET_PROPERTIES(MiniCL PROPERTIES FRAMEWORK true)
+		
+			SET_TARGET_PROPERTIES(MiniCL PROPERTIES PUBLIC_HEADER "${Root_HDRS}")
+			# Have to list out sub-directories manually:
+			SET_PROPERTY(SOURCE ${MiniCLTask_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/MiniCLTask)
+		
+		ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK)
+	ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES)
+ENDIF (INSTALL_LIBS)
+
diff --git a/Engine/lib/bullet/src/MiniCL/MiniCL.cpp b/Engine/lib/bullet/src/MiniCL/MiniCL.cpp
new file mode 100644
index 000000000..ba0865aa7
--- /dev/null
+++ b/Engine/lib/bullet/src/MiniCL/MiniCL.cpp
@@ -0,0 +1,788 @@
+/*
+   Copyright (C) 2010 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+
+#include "MiniCL/cl.h"
+#define __PHYSICS_COMMON_H__ 1
+#ifdef _WIN32
+#include "BulletMultiThreaded/Win32ThreadSupport.h"
+#endif
+
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#ifdef USE_PTHREADS
+#include "BulletMultiThreaded/PosixThreadSupport.h"
+#endif
+
+
+#include "BulletMultiThreaded/SequentialThreadSupport.h"
+#include "MiniCLTaskScheduler.h"
+#include "MiniCLTask/MiniCLTask.h"
+#include "LinearMath/btMinMax.h"
+#include <stdio.h>
+#include <stddef.h>
+
+//#define DEBUG_MINICL_KERNELS 1
+
+static const char* spPlatformID = "MiniCL, SCEA";
+static const char* spDriverVersion= "1.0";
+
+CL_API_ENTRY cl_int CL_API_CALL clGetPlatformIDs(
+	cl_uint           num_entries,
+    cl_platform_id *  platforms,
+    cl_uint *         num_platforms ) CL_API_SUFFIX__VERSION_1_0
+{
+	if(platforms != NULL)
+	{
+		if(num_entries <= 0)
+		{
+			return CL_INVALID_VALUE; 
+		}
+		*((const char**)platforms) = spPlatformID;
+	}
+	if(num_platforms != NULL)
+	{
+		*num_platforms = 1;
+	}
+	return CL_SUCCESS;
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clGetPlatformInfo(
+	cl_platform_id   platform, 
+	cl_platform_info param_name,
+	size_t           param_value_size, 
+	void *           param_value,
+	size_t *         param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+{
+	char* pId = (char*)platform;
+	if(strcmp(pId, spPlatformID))
+	{
+			return CL_INVALID_PLATFORM; 
+	}
+	switch(param_name)
+	{
+	case CL_PLATFORM_VERSION:
+		{
+			if(param_value_size < (strlen(spDriverVersion) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spDriverVersion);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spDriverVersion) + 1;
+			}
+			break;
+		}
+		case CL_PLATFORM_NAME:
+		case CL_PLATFORM_VENDOR	:
+			if(param_value_size < (strlen(spPlatformID) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spPlatformID);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spPlatformID) + 1;
+			}
+			break;
+		default : 
+			return CL_INVALID_VALUE; 
+	}
+	return CL_SUCCESS;
+}
+
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfo(
+	cl_device_id            device ,
+	cl_device_info          param_name ,
+	size_t                  param_value_size ,
+	void *                  param_value ,
+	size_t *                param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
+{
+
+	switch (param_name)
+	{
+	case CL_DEVICE_NAME:
+		{
+			char deviceName[] = "MiniCL CPU";
+			unsigned int nameLen = (unsigned int)strlen(deviceName)+1;
+			btAssert(param_value_size>strlen(deviceName));
+			if (nameLen < param_value_size)
+			{
+				const char* cpuName = "MiniCL CPU";
+				sprintf((char*)param_value,"%s",cpuName);
+			} else
+			{
+				printf("error: param_value_size should be at least %d, but it is %zu\n",nameLen,param_value_size);
+				return CL_INVALID_VALUE; 
+			}
+			break;
+		}
+	case CL_DEVICE_TYPE:
+		{
+			if (param_value_size>=sizeof(cl_device_type))
+			{
+				cl_device_type* deviceType = (cl_device_type*)param_value;
+				*deviceType = CL_DEVICE_TYPE_CPU;
+			} else
+			{
+				printf("error: param_value_size should be at least %zu\n",sizeof(cl_device_type));
+				return CL_INVALID_VALUE; 
+			}
+			break;
+		}
+	case CL_DEVICE_MAX_COMPUTE_UNITS:
+		{
+			if (param_value_size>=sizeof(cl_uint))
+			{
+				cl_uint* numUnits = (cl_uint*)param_value;
+				*numUnits= 4;
+			} else
+			{
+				printf("error: param_value_size should be at least %zu\n",sizeof(cl_uint));
+				return CL_INVALID_VALUE; 
+			}
+
+			break;
+		}
+	case CL_DEVICE_MAX_WORK_ITEM_SIZES:
+		{
+			size_t workitem_size[3];
+
+			if (param_value_size>=sizeof(workitem_size))
+			{
+				size_t* workItemSize = (size_t*)param_value;
+				workItemSize[0] = 64;
+				workItemSize[1] = 24;
+				workItemSize[2] = 16;
+			} else
+			{
+				printf("error: param_value_size should be at least %zu\n",sizeof(cl_uint));
+				return CL_INVALID_VALUE; 
+			}
+			break;
+		}
+	case CL_DEVICE_MAX_CLOCK_FREQUENCY:
+		{
+			 cl_uint* clock_frequency = (cl_uint*)param_value;
+			 *clock_frequency = 3*1024;
+			break;
+		}
+
+	case CL_DEVICE_VENDOR	:
+		{
+			if(param_value_size < (strlen(spPlatformID) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spPlatformID);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spPlatformID) + 1;
+			}
+			break;
+		}
+	case CL_DRIVER_VERSION:
+		{
+			if(param_value_size < (strlen(spDriverVersion) + 1))
+			{
+				return CL_INVALID_VALUE; 
+			}
+			strcpy((char*)param_value, spDriverVersion);
+			if(param_value_size_ret != NULL)
+			{
+				*param_value_size_ret = strlen(spDriverVersion) + 1;
+			}
+
+			break;
+		}
+	case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
+		{
+			 cl_uint* maxDimensions = (cl_uint*)param_value;
+			 *maxDimensions = 1;
+			 break;
+		}
+		case CL_DEVICE_MAX_WORK_GROUP_SIZE:
+		{
+			 cl_uint* maxWorkGroupSize = (cl_uint*)param_value;
+			 *maxWorkGroupSize = 128;//1;
+			 break;
+		}
+		case CL_DEVICE_ADDRESS_BITS:
+		{
+			 cl_uint* addressBits = (cl_uint*)param_value;
+			 *addressBits= 32; //@todo: should this be 64 for 64bit builds?
+			 break;
+		}
+		case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
+			{
+				cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
+				*maxMemAlloc= 512*1024*1024; //this "should be enough for everyone" ?
+			 break;
+			}
+		case CL_DEVICE_GLOBAL_MEM_SIZE:
+			{
+				cl_ulong* maxMemAlloc = (cl_ulong*)param_value;
+				*maxMemAlloc= 1024*1024*1024; //this "should be enough for everyone" ?
+			 break;
+			}
+
+		case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
+			{
+			cl_bool* error_correction_support = (cl_bool*)param_value;
+			*error_correction_support = CL_FALSE;
+			break;
+			}
+
+		case CL_DEVICE_LOCAL_MEM_TYPE:
+			{
+			cl_device_local_mem_type* local_mem_type = (cl_device_local_mem_type*)param_value;
+			*local_mem_type = CL_GLOBAL;
+			break;
+			}
+		case CL_DEVICE_LOCAL_MEM_SIZE:
+			{
+				cl_ulong* localmem = (cl_ulong*) param_value;
+				*localmem = 32*1024;
+				break;
+			}
+
+		case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
+			{
+				cl_ulong* localmem = (cl_ulong*) param_value;
+				*localmem = 64*1024;
+				break;
+			}
+		case CL_DEVICE_QUEUE_PROPERTIES:
+			{
+				cl_command_queue_properties* queueProp = (cl_command_queue_properties*) param_value;
+				memset(queueProp,0,param_value_size);
+
+				break;
+			}
+		case CL_DEVICE_IMAGE_SUPPORT:
+			{
+				cl_bool* imageSupport = (cl_bool*) param_value;
+				*imageSupport = CL_FALSE;
+				break;
+			}
+
+		case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
+		case CL_DEVICE_MAX_READ_IMAGE_ARGS:
+			{
+				cl_uint* imageArgs = (cl_uint*) param_value;
+				*imageArgs = 0;
+				break;
+			}
+		case CL_DEVICE_IMAGE3D_MAX_DEPTH:
+		case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
+		case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
+		case CL_DEVICE_IMAGE3D_MAX_WIDTH:
+		case CL_DEVICE_IMAGE2D_MAX_WIDTH:
+			{
+				size_t* maxSize = (size_t*) param_value;
+				*maxSize = 0;
+				break;
+			}
+
+		case CL_DEVICE_EXTENSIONS:
+			{
+				char* extensions = (char*) param_value;
+				*extensions = 0;
+				break;
+			}
+
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
+		case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
+			{
+				cl_uint* width  = (cl_uint*) param_value;
+				*width = 1;
+				break;
+			}
+			
+	default:
+		{
+			printf("error: unsupported param_name:%d\n",param_name);
+		}
+	}
+
+
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseKernel(cl_kernel   /* kernel */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+
+// Enqueued Commands APIs
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueReadBuffer(cl_command_queue     command_queue ,
+                    cl_mem               buffer ,
+                    cl_bool             /* blocking_read */,
+                    size_t               offset ,
+                    size_t               cb , 
+                    void *               ptr ,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+
+	///wait for all work items to be completed
+	scheduler->flush();
+
+	memcpy(ptr,(char*)buffer + offset,cb);
+	return 0;
+}
+
+
+CL_API_ENTRY cl_int clGetProgramBuildInfo(cl_program            /* program */,
+                      cl_device_id          /* device */,
+                      cl_program_build_info /* param_name */,
+                      size_t                /* param_value_size */,
+                      void *                /* param_value */,
+                      size_t *              /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+
+	return 0;
+}
+
+
+// Program Object APIs
+CL_API_ENTRY cl_program
+clCreateProgramWithSource(cl_context         context ,
+                          cl_uint           /* count */,
+                          const char **     /* strings */,
+                          const size_t *    /* lengths */,
+                          cl_int *          errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	*errcode_ret = CL_SUCCESS;
+	return (cl_program)context;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueWriteBuffer(cl_command_queue     command_queue ,
+                    cl_mem               buffer ,
+                    cl_bool             /* blocking_read */,
+                    size_t              offset,
+                    size_t               cb , 
+                    const void *         ptr ,
+                    cl_uint             /* num_events_in_wait_list */,
+                    const cl_event *    /* event_wait_list */,
+                    cl_event *          /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+
+	///wait for all work items to be completed
+	scheduler->flush();
+
+	memcpy((char*)buffer + offset, ptr,cb);
+	return 0;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clFlush(cl_command_queue  command_queue)
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+	///wait for all work items to be completed
+	scheduler->flush();
+	return 0;
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
+                       cl_kernel         clKernel ,
+                       cl_uint           work_dim ,
+                       const size_t *   /* global_work_offset */,
+                       const size_t *    global_work_size ,
+                       const size_t *   /* local_work_size */,
+                       cl_uint          /* num_events_in_wait_list */,
+                       const cl_event * /* event_wait_list */,
+                       cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0
+{
+
+	
+	MiniCLKernel* kernel = (MiniCLKernel*) clKernel;
+	for (unsigned int ii=0;ii<work_dim;ii++)
+	{
+		int maxTask = kernel->m_scheduler->getMaxNumOutstandingTasks();
+		int numWorkItems = global_work_size[ii];
+
+//		//at minimum 64 work items per task
+//		int numWorkItemsPerTask = btMax(64,numWorkItems / maxTask);
+		int numWorkItemsPerTask = numWorkItems / maxTask;
+		if (!numWorkItemsPerTask) numWorkItemsPerTask = 1;
+
+		for (int t=0;t<numWorkItems;)
+		{
+			//Performance Hint: tweak this number during benchmarking
+			int endIndex = (t+numWorkItemsPerTask) < numWorkItems ? t+numWorkItemsPerTask : numWorkItems;
+			kernel->m_scheduler->issueTask(t, endIndex, kernel);
+			t = endIndex;
+		}
+	}
+/*
+
+	void* bla = 0;
+
+	scheduler->issueTask(bla,2,3);
+	scheduler->flush();
+
+	*/
+
+	return 0;
+}
+
+#define LOCAL_BUF_SIZE 32768
+static int sLocalMemBuf[LOCAL_BUF_SIZE * 4 + 16];
+static int* spLocalBufCurr = NULL;
+static int sLocalBufUsed = LOCAL_BUF_SIZE; // so it will be reset at the first call
+static void* localBufMalloc(int size)
+{
+	int size16 = (size + 15) >> 4; // in 16-byte units
+	if((sLocalBufUsed + size16) > LOCAL_BUF_SIZE)
+	{ // reset
+		spLocalBufCurr = sLocalMemBuf;
+		while((size_t)spLocalBufCurr & 0x0F) spLocalBufCurr++; // align to 16 bytes
+		sLocalBufUsed = 0;
+	}
+	void* ret = spLocalBufCurr;
+	spLocalBufCurr += size16 * 4;
+	sLocalBufUsed += size;
+	return ret;
+}
+
+
+
+CL_API_ENTRY cl_int CL_API_CALL clSetKernelArg(cl_kernel    clKernel ,
+               cl_uint      arg_index ,
+               size_t       arg_size ,
+               const void *  arg_value ) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLKernel* kernel = (MiniCLKernel* ) clKernel;
+	btAssert(arg_size <= MINICL_MAX_ARGLENGTH);
+	if (arg_index>MINI_CL_MAX_ARG)
+	{
+		printf("error: clSetKernelArg arg_index (%u) exceeds %u\n",arg_index,MINI_CL_MAX_ARG);
+	} else
+	{
+		if (arg_size>MINICL_MAX_ARGLENGTH)
+		//if (arg_size != MINICL_MAX_ARGLENGTH)
+		{
+			printf("error: clSetKernelArg argdata too large: %zu (maximum is %zu)\n",arg_size,MINICL_MAX_ARGLENGTH);
+		} 
+		else
+		{
+			if(arg_value == NULL)
+			{	// this is only for __local memory qualifier
+				void* ptr = localBufMalloc(arg_size);
+				kernel->m_argData[arg_index] = ptr;
+			}
+			else
+			{
+				memcpy(&(kernel->m_argData[arg_index]), arg_value, arg_size);
+			}
+			kernel->m_argSizes[arg_index] = arg_size;
+			if(arg_index >= kernel->m_numArgs)
+			{
+				kernel->m_numArgs = arg_index + 1;
+				kernel->updateLauncher();
+			}
+		}
+	}
+	return 0;
+}
+
+// Kernel Object APIs
+CL_API_ENTRY cl_kernel CL_API_CALL clCreateKernel(cl_program       program ,
+               const char *     kernel_name ,
+               cl_int *         errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) program;
+	int nameLen = strlen(kernel_name);
+	if(nameLen >= MINI_CL_MAX_KERNEL_NAME)
+	{
+		*errcode_ret = CL_INVALID_KERNEL_NAME;
+		return NULL;
+	}
+
+	MiniCLKernel* kernel = new MiniCLKernel();
+
+	strcpy(kernel->m_name, kernel_name);
+	kernel->m_numArgs = 0;
+
+	//kernel->m_kernelProgramCommandId = scheduler->findProgramCommandIdByName(kernel_name);
+	//if (kernel->m_kernelProgramCommandId>=0)
+	//{
+	//	*errcode_ret = CL_SUCCESS;
+	//} else
+	//{
+	//	*errcode_ret = CL_INVALID_KERNEL_NAME;
+	//}
+	kernel->m_scheduler = scheduler;
+	if(kernel->registerSelf() == NULL)
+	{
+		*errcode_ret = CL_INVALID_KERNEL_NAME;
+		delete kernel;
+		return NULL;
+	}
+	else
+	{
+		*errcode_ret = CL_SUCCESS;
+	}
+
+	return (cl_kernel)kernel;
+
+}
+
+
+CL_API_ENTRY cl_int CL_API_CALL clBuildProgram(cl_program           /* program */,
+               cl_uint              /* num_devices */,
+               const cl_device_id * /* device_list */,
+               const char *         /* options */, 
+               void (*pfn_notify)(cl_program /* program */, void * /* user_data */),
+               void *               /* user_data */) CL_API_SUFFIX__VERSION_1_0
+{
+	return CL_SUCCESS;
+}
+
+CL_API_ENTRY cl_program CL_API_CALL clCreateProgramWithBinary(cl_context                     context ,
+                          cl_uint                        /* num_devices */,
+                          const cl_device_id *           /* device_list */,
+                          const size_t *                 /* lengths */,
+                          const unsigned char **         /* binaries */,
+                          cl_int *                       /* binary_status */,
+                          cl_int *                       /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+	return (cl_program)context;
+}
+
+
+// Memory Object APIs
+CL_API_ENTRY cl_mem CL_API_CALL clCreateBuffer(cl_context   /* context */,
+               cl_mem_flags flags ,
+               size_t       size,
+               void *       host_ptr ,
+               cl_int *     errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	cl_mem buf = (cl_mem)malloc(size);
+	if ((flags&CL_MEM_COPY_HOST_PTR) && host_ptr)
+	{
+		memcpy(buf,host_ptr,size);
+	}
+	*errcode_ret = 0;
+	return buf;
+}
+
+// Command Queue APIs
+CL_API_ENTRY cl_command_queue CL_API_CALL clCreateCommandQueue(cl_context                      context , 
+                     cl_device_id                   /* device */, 
+                     cl_command_queue_properties    /* properties */,
+                     cl_int *                        errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	*errcode_ret = 0;
+	return (cl_command_queue) context;
+}
+
+extern CL_API_ENTRY cl_int CL_API_CALL clGetContextInfo(cl_context         /* context */, 
+                 cl_context_info    param_name , 
+                 size_t             param_value_size , 
+                 void *             param_value, 
+                 size_t *           param_value_size_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+
+	switch (param_name)
+	{
+	case CL_CONTEXT_DEVICES:
+		{
+			if (!param_value_size)
+			{
+				*param_value_size_ret = 13;
+			} else
+			{
+				const char* testName = "MiniCL_Test.";
+				sprintf((char*)param_value,"%s",testName);
+			}
+			break;
+		};
+	default:
+		{
+			printf("unsupported\n");
+		}
+	}
+	
+	return 0;
+}
+
+
+
+CL_API_ENTRY cl_context CL_API_CALL clCreateContextFromType(const cl_context_properties * /* properties */,
+                        cl_device_type           device_type ,
+                        void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
+                        void *                  /* user_data */,
+                        cl_int *                 errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	int maxNumOutstandingTasks = 4;
+//	int maxNumOutstandingTasks = 2;
+//	int maxNumOutstandingTasks = 1;
+	gMiniCLNumOutstandingTasks = maxNumOutstandingTasks;
+	const int maxNumOfThreadSupports = 8;
+	static int sUniqueThreadSupportIndex = 0;
+	static const char* sUniqueThreadSupportName[maxNumOfThreadSupports] = 
+	{
+		"MiniCL_0", "MiniCL_1", "MiniCL_2", "MiniCL_3", "MiniCL_4", "MiniCL_5", "MiniCL_6", "MiniCL_7" 
+	};
+
+	btThreadSupportInterface* threadSupport = 0;
+
+	if (device_type==CL_DEVICE_TYPE_DEBUG)
+	{
+		SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
+		threadSupport = new SequentialThreadSupport(stc);
+	} else
+	{
+
+#if _WIN32
+	btAssert(sUniqueThreadSupportIndex < maxNumOfThreadSupports);
+	const char* bla = "MiniCL";
+	threadSupport = new Win32ThreadSupport(Win32ThreadSupport::Win32ThreadConstructionInfo(
+//								bla,
+								sUniqueThreadSupportName[sUniqueThreadSupportIndex++],
+								processMiniCLTask, //processCollisionTask,
+								createMiniCLLocalStoreMemory,//createCollisionLocalStoreMemory,
+								maxNumOutstandingTasks));
+#else
+
+#ifdef USE_PTHREADS
+		PosixThreadSupport::ThreadConstructionInfo constructionInfo("PosixThreads",
+																	processMiniCLTask,
+																	createMiniCLLocalStoreMemory,
+																	maxNumOutstandingTasks);
+		threadSupport = new PosixThreadSupport(constructionInfo);
+
+#else
+	///todo: add posix thread support for other platforms
+	SequentialThreadSupport::SequentialThreadConstructionInfo stc("MiniCL",processMiniCLTask,createMiniCLLocalStoreMemory);
+	threadSupport = new SequentialThreadSupport(stc);
+#endif //USE_PTHREADS
+#endif
+
+	}
+	
+	
+	MiniCLTaskScheduler* scheduler = new MiniCLTaskScheduler(threadSupport,maxNumOutstandingTasks);
+
+	*errcode_ret = 0;
+	return (cl_context)scheduler;
+}
+
+CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDs(cl_platform_id   /* platform */,
+               cl_device_type   /* device_type */, 
+               cl_uint          /* num_entries */, 
+               cl_device_id *   /* devices */, 
+               cl_uint *        /* num_devices */) CL_API_SUFFIX__VERSION_1_0
+{
+	return 0;
+}
+
+CL_API_ENTRY cl_context CL_API_CALL
+clCreateContext(const cl_context_properties *  properties ,
+                cl_uint                        num_devices ,
+                const cl_device_id *           devices ,
+                 void (*pfn_notify)(const char *, const void *, size_t, void *),
+                void *                         user_data ,
+                cl_int *                       errcode_ret ) CL_API_SUFFIX__VERSION_1_0
+{
+	
+	return	clCreateContextFromType(properties,CL_DEVICE_TYPE_ALL,pfn_notify,user_data,errcode_ret);
+}
+
+CL_API_ENTRY cl_int CL_API_CALL clReleaseContext(cl_context  context ) CL_API_SUFFIX__VERSION_1_0
+{
+
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) context;
+	
+	btThreadSupportInterface* threadSupport = scheduler->getThreadSupportInterface();
+	delete scheduler;
+	delete threadSupport;
+	
+	return 0;
+}
+extern CL_API_ENTRY cl_int CL_API_CALL
+clFinish(cl_command_queue command_queue ) CL_API_SUFFIX__VERSION_1_0
+{
+	MiniCLTaskScheduler* scheduler = (MiniCLTaskScheduler*) command_queue;
+	///wait for all work items to be completed
+	scheduler->flush();
+	return CL_SUCCESS;
+}
+
+extern CL_API_ENTRY cl_int CL_API_CALL 
+clGetProgramInfo(cl_program         /* program */,
+                 cl_program_info    /* param_name */,
+                 size_t             /* param_value_size */,
+                 void *             /* param_value */,
+                 size_t *           /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+   return 0;
+}
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelWorkGroupInfo(cl_kernel                   kernel ,
+                         cl_device_id               /* device */,
+                         cl_kernel_work_group_info  wgi/* param_name */,
+                         size_t   sz                  /* param_value_size */,
+                         void *     ptr                /* param_value */,
+                         size_t *                   /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0
+{
+	if((wgi == CL_KERNEL_WORK_GROUP_SIZE)
+	 &&(sz == sizeof(size_t))
+	 &&(ptr != NULL))
+	{
+		MiniCLKernel* miniCLKernel = (MiniCLKernel*)kernel;
+		MiniCLTaskScheduler* scheduler = miniCLKernel->m_scheduler;
+		*((size_t*)ptr) = scheduler->getMaxNumOutstandingTasks();
+		return CL_SUCCESS;
+	}
+	else
+	{
+		return CL_INVALID_VALUE;
+	}
+}
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp b/Engine/lib/bullet/src/MiniCL/MiniCLTask/MiniCLTask.cpp
similarity index 50%
rename from Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
rename to Engine/lib/bullet/src/MiniCL/MiniCLTask/MiniCLTask.cpp
index b9680eaa2..a56e96a0c 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.cpp
+++ b/Engine/lib/bullet/src/MiniCL/MiniCLTask/MiniCLTask.cpp
@@ -15,10 +15,12 @@ subject to the following restrictions:
 
 
 #include "MiniCLTask.h"
-#include "../PlatformDefinitions.h"
-#include "../SpuFakeDma.h"
+#include "BulletMultiThreaded/PlatformDefinitions.h"
+#include "BulletMultiThreaded/SpuFakeDma.h"
 #include "LinearMath/btMinMax.h"
-#include "BulletMultiThreaded/MiniCLTask/MiniCLTask.h"
+#include "MiniCLTask.h"
+#include "MiniCL/MiniCLTaskScheduler.h"
+
 
 #ifdef __SPU__
 #include <spu_printf.h>
@@ -27,9 +29,7 @@ subject to the following restrictions:
 #define spu_printf printf
 #endif
 
-#define __kernel
-#define __global
-#define get_global_id(a) guid
+int gMiniCLNumOutstandingTasks = 0;
 
 struct MiniCLTask_LocalStoreMemory
 {
@@ -37,34 +37,6 @@ struct MiniCLTask_LocalStoreMemory
 };
 
 
-///////////////////////////////////////////////////
-// OpenCL Kernel Function for element by element vector addition
-__kernel void VectorAdd(__global const float8* a, __global const float8* b, __global float8* c, int guid)
-{
-    // get oct-float index into global data array
-    int iGID = get_global_id(0);
-
-    // read inputs into registers
-    float8 f8InA = a[iGID];
-    float8 f8InB = b[iGID];
-    float8 f8Out = (float8)0.0f;
-    
-    // add the vector elements
-    f8Out.s0 = f8InA.s0 + f8InB.s0;
-    f8Out.s1 = f8InA.s1 + f8InB.s1;
-    f8Out.s2 = f8InA.s2 + f8InB.s2;
-    f8Out.s3 = f8InA.s3 + f8InB.s3;
-    f8Out.s4 = f8InA.s4 + f8InB.s4;
-    f8Out.s5 = f8InA.s5 + f8InB.s5;
-    f8Out.s6 = f8InA.s6 + f8InB.s6;
-    f8Out.s7 = f8InA.s7 + f8InB.s7;
-
-    // write back out to GMEM
-    c[get_global_id(0)] = f8Out;
-}
-///////////////////////////////////////////////////
-
-
 //-- MAIN METHOD
 void processMiniCLTask(void* userPtr, void* lsMemory)
 {
@@ -75,27 +47,13 @@ void processMiniCLTask(void* userPtr, void* lsMemory)
 	MiniCLTaskDesc* taskDescPtr = (MiniCLTaskDesc*)userPtr;
 	MiniCLTaskDesc& taskDesc = *taskDescPtr;
 
-	printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
-	
-	
-	switch (taskDesc.m_kernelProgramId)
+	for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
 	{
-	case CMD_MINICL_ADDVECTOR:
-		{
-			for (unsigned int i=taskDesc.m_firstWorkUnit;i<taskDesc.m_lastWorkUnit;i++)
-			{
-				VectorAdd(*(const float8**)&taskDesc.m_argData[0][0],*(const float8**)&taskDesc.m_argData[1][0],*(float8**)&taskDesc.m_argData[2][0],i);
-			}
-			break;
-		}
-
-	default:
-		{
-			printf("error in processMiniCLTask: unknown command id: %d\n",taskDesc.m_kernelProgramId);
-
-		}
-	};
+		taskDesc.m_kernel->m_launcher(&taskDesc, i);
+	}
 
+//	printf("Compute Unit[%d] executed kernel %d work items [%d..%d)\n",taskDesc.m_taskId,taskDesc.m_kernelProgramId,taskDesc.m_firstWorkUnit,taskDesc.m_lastWorkUnit);
+	
 }
 
 
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h b/Engine/lib/bullet/src/MiniCL/MiniCLTask/MiniCLTask.h
similarity index 72%
rename from Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
rename to Engine/lib/bullet/src/MiniCL/MiniCLTask/MiniCLTask.h
index 87fea318a..7e78be085 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTask/MiniCLTask.h
+++ b/Engine/lib/bullet/src/MiniCL/MiniCLTask/MiniCLTask.h
@@ -16,39 +16,17 @@ subject to the following restrictions:
 #ifndef MINICL__TASK_H
 #define MINICL__TASK_H
 
-#include "../PlatformDefinitions.h"
+#include "BulletMultiThreaded/PlatformDefinitions.h"
 #include "LinearMath/btScalar.h"
 
 #include "LinearMath/btAlignedAllocator.h"
 
 
-enum
-{
-	CMD_MINICL_1= 1,
-	CMD_MINICL_ADDVECTOR
-};
+#define MINICL_MAX_ARGLENGTH (sizeof(void*))
+#define MINI_CL_MAX_ARG 16
+#define MINI_CL_MAX_KERNEL_NAME 256
 
-
-
-struct float8
-{
-	float s0;
-	float s1;
-	float s2;
-	float s3;
-	float s4;
-	float s5;
-	float s6;
-	float s7;
-
-	float8(float scalar)
-	{
-		s0=s1=s2=s3=s4=s5=s6=s7=scalar;
-	}
-};
-
-#define MINICL_MAX_ARGLENGTH 128
-#define MINI_CL_MAX_ARG 8
+struct MiniCLKernel;
 
 ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
 {
@@ -62,16 +40,19 @@ ATTRIBUTE_ALIGNED16(struct) MiniCLTaskDesc
 		}
 	}
 
-	uint32_t	m_taskId;
+	uint32_t		m_taskId;
 
-	uint32_t	m_kernelProgramId;
-	uint32_t	m_firstWorkUnit;
-	uint32_t	m_lastWorkUnit;
+	uint32_t		m_firstWorkUnit;
+	uint32_t		m_lastWorkUnit;
 
-	char		m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
-	int			m_argSizes[MINI_CL_MAX_ARG];
+	MiniCLKernel*	m_kernel;
+
+	void*			m_argData[MINI_CL_MAX_ARG];
+	int				m_argSizes[MINI_CL_MAX_ARG];
 };
 
+extern "C" int gMiniCLNumOutstandingTasks;
+
 
 void	processMiniCLTask(void* userPtr, void* lsMemory);
 void*	createMiniCLLocalStoreMemory();
diff --git a/Engine/lib/bullet/src/MiniCL/MiniCLTaskScheduler.cpp b/Engine/lib/bullet/src/MiniCL/MiniCLTaskScheduler.cpp
new file mode 100644
index 000000000..18cf64576
--- /dev/null
+++ b/Engine/lib/bullet/src/MiniCL/MiniCLTaskScheduler.cpp
@@ -0,0 +1,519 @@
+/*
+Bullet Continuous Collision Detection and Physics Library
+Copyright (c) 2003-2007 Erwin Coumans  http://bulletphysics.com
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+
+//#define __CELLOS_LV2__ 1
+#define __BT_SKIP_UINT64_H 1
+
+#define USE_SAMPLE_PROCESS 1
+#ifdef USE_SAMPLE_PROCESS
+
+
+#include "MiniCLTaskScheduler.h"
+#include <stdio.h>
+
+#ifdef __SPU__
+
+
+
+void	SampleThreadFunc(void* userPtr,void* lsMemory)
+{
+	//do nothing
+	printf("hello world\n");
+}
+
+
+void*	SamplelsMemoryFunc()
+{
+	//don't create local store memory, just return 0
+	return 0;
+}
+
+
+#else
+
+
+#include "BulletMultiThreaded/btThreadSupportInterface.h"
+
+//#	include "SPUAssert.h"
+#include <string.h>
+
+#include "MiniCL/cl_platform.h"
+
+extern "C" {
+	extern char SPU_SAMPLE_ELF_SYMBOL[];
+}
+
+
+MiniCLTaskScheduler::MiniCLTaskScheduler(btThreadSupportInterface*	threadInterface,  int maxNumOutstandingTasks)
+:m_threadInterface(threadInterface),
+m_maxNumOutstandingTasks(maxNumOutstandingTasks)
+{
+
+	m_taskBusy.resize(m_maxNumOutstandingTasks);
+	m_spuSampleTaskDesc.resize(m_maxNumOutstandingTasks);
+
+	m_kernels.resize(0);
+
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		m_taskBusy[i] = false;
+	}
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+
+	m_initialized = false;
+
+	m_threadInterface->startSPU();
+
+
+}
+
+MiniCLTaskScheduler::~MiniCLTaskScheduler()
+{
+	m_threadInterface->stopSPU();
+	
+}
+
+
+
+void	MiniCLTaskScheduler::initialize()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("MiniCLTaskScheduler::initialize()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		m_taskBusy[i] = false;
+	}
+	m_numBusyTasks = 0;
+	m_currentTask = 0;
+	m_initialized = true;
+
+}
+
+
+void MiniCLTaskScheduler::issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel)
+{
+
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("MiniCLTaskScheduler::issueTask (m_currentTask= %d\)n", m_currentTask);
+#endif //DEBUG_SPU_TASK_SCHEDULING
+
+	m_taskBusy[m_currentTask] = true;
+	m_numBusyTasks++;
+
+	MiniCLTaskDesc& taskDesc = m_spuSampleTaskDesc[m_currentTask];
+	{
+		// send task description in event message
+		taskDesc.m_firstWorkUnit = firstWorkUnit;
+		taskDesc.m_lastWorkUnit = lastWorkUnit;
+		taskDesc.m_kernel = kernel;
+		//some bookkeeping to recognize finished tasks
+		taskDesc.m_taskId = m_currentTask;
+		
+//		for (int i=0;i<MINI_CL_MAX_ARG;i++)
+		for (unsigned int i=0; i < kernel->m_numArgs; i++)
+		{
+			taskDesc.m_argSizes[i] = kernel->m_argSizes[i];
+			if (taskDesc.m_argSizes[i])
+			{
+				taskDesc.m_argData[i] = kernel->m_argData[i];
+//				memcpy(&taskDesc.m_argData[i],&argData[MINICL_MAX_ARGLENGTH*i],taskDesc.m_argSizes[i]);
+			}
+		}
+	}
+
+
+	m_threadInterface->sendRequest(1, (ppu_address_t) &taskDesc, m_currentTask);
+
+	// if all tasks busy, wait for spu event to clear the task.
+	
+	if (m_numBusyTasks >= m_maxNumOutstandingTasks)
+	{
+		unsigned int taskId;
+		unsigned int outputSize;
+
+		for (int i=0;i<m_maxNumOutstandingTasks;i++)
+	  {
+		  if (m_taskBusy[i])
+		  {
+			  taskId = i;
+			  break;
+		  }
+	  }
+		m_threadInterface->waitForResponse(&taskId, &outputSize);
+
+		//printf("PPU: after issue, received event: %u %d\n", taskId, outputSize);
+
+		postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+
+	// find new task buffer
+	for (int i = 0; i < m_maxNumOutstandingTasks; i++)
+	{
+		if (!m_taskBusy[i])
+		{
+			m_currentTask = i;
+			break;
+		}
+	}
+}
+
+
+///Optional PPU-size post processing for each task
+void MiniCLTaskScheduler::postProcess(int taskId, int outputSize)
+{
+
+}
+
+
+void MiniCLTaskScheduler::flush()
+{
+#ifdef DEBUG_SPU_TASK_SCHEDULING
+	printf("\nSpuCollisionTaskProcess::flush()\n");
+#endif //DEBUG_SPU_TASK_SCHEDULING
+	
+
+	// all tasks are issued, wait for all tasks to be complete
+	while(m_numBusyTasks > 0)
+	{
+// Consolidating SPU code
+	  unsigned int taskId;
+	  unsigned int outputSize;
+	  
+	  for (int i=0;i<m_maxNumOutstandingTasks;i++)
+	  {
+		  if (m_taskBusy[i])
+		  {
+			  taskId = i;
+			  break;
+		  }
+	  }
+	  {
+			
+		  m_threadInterface->waitForResponse(&taskId, &outputSize);
+	  }
+
+		//printf("PPU: flushing, received event: %u %d\n", taskId, outputSize);
+
+		postProcess(taskId, outputSize);
+
+		m_taskBusy[taskId] = false;
+
+		m_numBusyTasks--;
+	}
+
+
+}
+
+
+
+typedef void (*MiniCLKernelLauncher0)(int);
+typedef void (*MiniCLKernelLauncher1)(void*, int);
+typedef void (*MiniCLKernelLauncher2)(void*, void*, int);
+typedef void (*MiniCLKernelLauncher3)(void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher4)(void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher5)(void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher6)(void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher7)(void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher8)(void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher9)(void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher10)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher11)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher12)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher13)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher14)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher15)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+typedef void (*MiniCLKernelLauncher16)(void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, int);
+
+
+static void kernelLauncher0(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher0)(taskDesc->m_kernel->m_launcher))(guid);
+}
+static void kernelLauncher1(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher1)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												guid);
+}
+static void kernelLauncher2(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher2)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												guid);
+}
+static void kernelLauncher3(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher3)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												guid);
+}
+static void kernelLauncher4(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher4)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												guid);
+}
+static void kernelLauncher5(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher5)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												guid);
+}
+static void kernelLauncher6(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher6)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												guid);
+}
+static void kernelLauncher7(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher7)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												guid);
+}
+static void kernelLauncher8(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher8)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												guid);
+}
+static void kernelLauncher9(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher9)(taskDesc->m_kernel->m_pCode))(	taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												guid);
+}
+static void kernelLauncher10(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher10)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												guid);
+}
+static void kernelLauncher11(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher11)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												guid);
+}
+static void kernelLauncher12(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher12)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												guid);
+}
+static void kernelLauncher13(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher13)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												guid);
+}
+static void kernelLauncher14(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher14)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												taskDesc->m_argData[13], 
+												guid);
+}
+static void kernelLauncher15(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher15)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												taskDesc->m_argData[13], 
+												taskDesc->m_argData[14], 
+												guid);
+}
+static void kernelLauncher16(MiniCLTaskDesc* taskDesc, int guid)
+{
+	((MiniCLKernelLauncher16)(taskDesc->m_kernel->m_pCode))(taskDesc->m_argData[0], 
+												taskDesc->m_argData[1], 
+												taskDesc->m_argData[2], 
+												taskDesc->m_argData[3], 
+												taskDesc->m_argData[4], 
+												taskDesc->m_argData[5], 
+												taskDesc->m_argData[6], 
+												taskDesc->m_argData[7], 
+												taskDesc->m_argData[8], 
+												taskDesc->m_argData[9], 
+												taskDesc->m_argData[10], 
+												taskDesc->m_argData[11], 
+												taskDesc->m_argData[12], 
+												taskDesc->m_argData[13], 
+												taskDesc->m_argData[14], 
+												taskDesc->m_argData[15], 
+												guid);
+}
+
+static kernelLauncherCB spLauncherList[MINI_CL_MAX_ARG+1] = 
+{
+	kernelLauncher0,
+	kernelLauncher1,
+	kernelLauncher2,
+	kernelLauncher3,
+	kernelLauncher4,
+	kernelLauncher5,
+	kernelLauncher6,
+	kernelLauncher7,
+	kernelLauncher8,
+	kernelLauncher9,
+	kernelLauncher10,
+	kernelLauncher11,
+	kernelLauncher12,
+	kernelLauncher13,
+	kernelLauncher14,
+	kernelLauncher15,
+	kernelLauncher16
+};
+
+void MiniCLKernel::updateLauncher()
+{
+	m_launcher = spLauncherList[m_numArgs];
+}
+
+struct MiniCLKernelDescEntry
+{
+	void* pCode;
+	const char* pName;
+};
+static MiniCLKernelDescEntry spKernelDesc[256];
+static int sNumKernelDesc = 0;
+
+MiniCLKernelDesc::MiniCLKernelDesc(void* pCode, const char* pName)
+{
+	for(int i = 0; i < sNumKernelDesc; i++)
+	{
+		if(!strcmp(pName, spKernelDesc[i].pName))
+		{	// already registered
+			btAssert(spKernelDesc[i].pCode == pCode);
+			return; 
+		}
+	}
+	spKernelDesc[sNumKernelDesc].pCode = pCode;
+	spKernelDesc[sNumKernelDesc].pName = pName;
+	sNumKernelDesc++;
+}
+
+
+MiniCLKernel* MiniCLKernel::registerSelf()
+{
+	m_scheduler->registerKernel(this);
+	for(int i = 0; i < sNumKernelDesc; i++)
+	{
+		if(!strcmp(m_name, spKernelDesc[i].pName))
+		{
+			m_pCode = spKernelDesc[i].pCode;
+			return this;
+		}
+	}
+	return NULL;
+}
+
+#endif
+
+
+#endif //USE_SAMPLE_PROCESS
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTaskScheduler.h b/Engine/lib/bullet/src/MiniCL/MiniCLTaskScheduler.h
similarity index 87%
rename from Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTaskScheduler.h
rename to Engine/lib/bullet/src/MiniCL/MiniCLTaskScheduler.h
index 580b509b8..3061a7134 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/MiniCLTaskScheduler.h
+++ b/Engine/lib/bullet/src/MiniCL/MiniCLTaskScheduler.h
@@ -21,7 +21,7 @@ subject to the following restrictions:
 #include <assert.h>
 
 
-#include "PlatformDefinitions.h"
+#include "BulletMultiThreaded/PlatformDefinitions.h"
 
 #include <stdlib.h>
 
@@ -30,11 +30,10 @@ subject to the following restrictions:
 
 #include "MiniCLTask/MiniCLTask.h"
 
-
 //just add your commands here, try to keep them globally unique for debugging purposes
 #define CMD_SAMPLE_TASK_COMMAND 10
 
-
+struct MiniCLKernel;
 
 /// MiniCLTaskScheduler handles SPU processing of collision pairs.
 /// When PPU issues a task, it will look for completed task buffers
@@ -44,7 +43,11 @@ class MiniCLTaskScheduler
 	// track task buffers that are being used, and total busy tasks
 	btAlignedObjectArray<bool>	m_taskBusy;
 	btAlignedObjectArray<MiniCLTaskDesc>	m_spuSampleTaskDesc;
-	
+
+
+	btAlignedObjectArray<const MiniCLKernel*>	m_kernels;
+
+
 	int   m_numBusyTasks;
 
 	// the current task and the current entry to insert a new work unit
@@ -68,7 +71,7 @@ public:
 	///call initialize in the beginning of the frame, before addCollisionPairToTask
 	void initialize();
 
-	void issueTask(int firstWorkUnit, int lastWorkUnit,int kernelProgramId,char* argData,int* argSizes);
+	void issueTask(int firstWorkUnit, int lastWorkUnit, MiniCLKernel* kernel);
 
 	///call flush to submit potential outstanding work to SPUs and wait for all involved SPUs to be finished
 	void flush();
@@ -78,25 +81,35 @@ public:
 		return m_threadInterface;
 	}
 
-	int	findProgramCommandIdByName(const char* programName) const
-	{
-		return CMD_MINICL_ADDVECTOR;//hardcoded temp value, todo: implement multi-program support
-	}
+	int	findProgramCommandIdByName(const char* programName) const;
 
 	int getMaxNumOutstandingTasks() const
 	{
 		return m_maxNumOutstandingTasks;
 	}
+
+	void registerKernel(MiniCLKernel* kernel)
+	{
+		m_kernels.push_back(kernel);
+	}
 };
 
+typedef void (*kernelLauncherCB)(MiniCLTaskDesc* taskDesc, int guid);
 
 struct	MiniCLKernel
 {
 	MiniCLTaskScheduler* m_scheduler;
 	
-	int	m_kernelProgramCommandId;
+//	int	m_kernelProgramCommandId;
 
-	char	m_argData[MINI_CL_MAX_ARG][MINICL_MAX_ARGLENGTH];
+	char	m_name[MINI_CL_MAX_KERNEL_NAME];
+	unsigned int	m_numArgs;
+	kernelLauncherCB	m_launcher;
+	void* m_pCode;
+	void updateLauncher();
+	MiniCLKernel* registerSelf();
+
+	void*	m_argData[MINI_CL_MAX_ARG];
 	int				m_argSizes[MINI_CL_MAX_ARG];
 };
 
diff --git a/Engine/lib/bullet/src/MiniCL/cl.h b/Engine/lib/bullet/src/MiniCL/cl.h
index b0cda4237..352829883 100644
--- a/Engine/lib/bullet/src/MiniCL/cl.h
+++ b/Engine/lib/bullet/src/MiniCL/cl.h
@@ -155,8 +155,10 @@ typedef struct _cl_image_format {
 #define CL_DEVICE_TYPE_CPU                          (1 << 1)
 #define CL_DEVICE_TYPE_GPU                          (1 << 2)
 #define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
+#define CL_DEVICE_TYPE_DEBUG						(1 << 4)
 #define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
 
+
 // cl_device_info
 #define CL_DEVICE_TYPE                              0x1000
 #define CL_DEVICE_VENDOR_ID                         0x1001
@@ -435,7 +437,7 @@ clGetDeviceInfo(cl_device_id    /* device */,
 
 // Context APIs  
 extern CL_API_ENTRY cl_context CL_API_CALL
-clCreateContext(cl_context_properties * /* properties */,
+clCreateContext(const cl_context_properties * /* properties */,
                 cl_uint                 /* num_devices */,
                 const cl_device_id *    /* devices */,
                 void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
@@ -443,7 +445,7 @@ clCreateContext(cl_context_properties * /* properties */,
                 cl_int *                /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
 
 extern CL_API_ENTRY cl_context CL_API_CALL
-clCreateContextFromType(cl_context_properties * /* properties */,
+clCreateContextFromType(const cl_context_properties * /* properties */,
                         cl_device_type          /* device_type */,
                         void (*pfn_notify)(const char *, const void *, size_t, void *) /* pfn_notify */,
                         void *                  /* user_data */,
diff --git a/Engine/lib/bullet/src/MiniCL/cl_MiniCL_Defs.h b/Engine/lib/bullet/src/MiniCL/cl_MiniCL_Defs.h
new file mode 100644
index 000000000..0773c8575
--- /dev/null
+++ b/Engine/lib/bullet/src/MiniCL/cl_MiniCL_Defs.h
@@ -0,0 +1,439 @@
+/*
+Bullet Continuous Collision Detection and Physics Library, Copyright (c) 2007 Erwin Coumans
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#include <float.h>
+#include <math.h>
+#include "LinearMath/btScalar.h"
+
+#include "MiniCL/cl.h"
+
+
+#define __kernel
+#define __global
+#define __local
+#define get_global_id(a)	__guid_arg
+#define get_local_id(a)		((__guid_arg) % gMiniCLNumOutstandingTasks)
+#define get_local_size(a)	(gMiniCLNumOutstandingTasks)
+#define get_group_id(a)		((__guid_arg) / gMiniCLNumOutstandingTasks)
+
+//static unsigned int as_uint(float val) { return *((unsigned int*)&val); }
+
+
+#define CLK_LOCAL_MEM_FENCE		0x01
+#define CLK_GLOBAL_MEM_FENCE	0x02
+
+static void barrier(unsigned int a)
+{
+	// TODO : implement
+}
+
+//ATTRIBUTE_ALIGNED16(struct) float8
+struct float8
+{
+	float s0;
+	float s1;
+	float s2;
+	float s3;
+	float s4;
+	float s5;
+	float s6;
+	float s7;
+
+	float8(float scalar)
+	{
+		s0=s1=s2=s3=s4=s5=s6=s7=scalar;
+	}
+};
+
+
+float select( float arg0, float arg1, bool select)
+{
+	if (select)
+		return arg0;
+	return arg1;
+}
+
+#define __constant
+
+
+struct float3
+{
+	float x,y,z;
+
+	float3& operator+=(const float3& other)
+	{
+		x += other.x;
+		y += other.y;
+		z += other.z;
+		return *this;
+	}
+
+	float3& operator-=(const float3& other)
+	{
+		x -= other.x;
+		y -= other.y;
+		z -= other.z;
+		return *this;
+	}
+
+};
+
+static float dot(const float3&a ,const float3& b)
+{
+	float3 tmp;
+	tmp.x = a.x*b.x;
+	tmp.y = a.y*b.y;
+	tmp.z = a.z*b.z;
+	return tmp.x+tmp.y+tmp.z;
+}
+
+static float3 operator-(const float3& a,const float3& b)
+{
+	float3 tmp;
+	tmp.x = a.x - b.x;
+	tmp.y = a.y - b.y;
+	tmp.z = a.z - b.z;
+	return tmp;
+}
+
+static float3 operator*(const float& scalar,const float3& b)
+{
+	float3 tmp;
+	tmp.x = scalar * b.x;
+	tmp.y = scalar * b.y;
+	tmp.z = scalar * b.z;
+	return tmp;
+}
+
+static float3 operator*(const float3& a,const float& scalar)
+{
+	float3 tmp;
+	tmp.x = a.x * scalar;
+	tmp.y = a.y * scalar;
+	tmp.z = a.z * scalar;
+	return tmp;
+}
+
+
+static float3 operator*(const float3& a,const float3& b)
+{
+	float3 tmp;
+	tmp.x = a.x * b.x;
+	tmp.y = a.y * b.y;
+	tmp.z = a.z * b.z;
+	return tmp;
+}
+	
+
+//ATTRIBUTE_ALIGNED16(struct) float4
+struct float4
+{
+	union
+	{
+		struct {
+			float x;
+			float y;
+			float z;
+		};
+		float3 xyz;
+	};
+	float w;
+
+	float4() {}
+
+	float4(float v0, float v1, float v2, float v3)
+	{
+		x=v0;
+		y=v1;
+		z=v2;
+		w=v3;
+
+	}
+	float4(float3 xyz, float scalarW) 
+	{
+		x = xyz.x;
+		y = xyz.y;
+		z = xyz.z;
+		w = scalarW;
+	}
+
+	float4(float v) 
+	{
+		x = y = z = w = v; 
+	}
+	float4 operator*(const float4& other)
+	{
+		float4 tmp;
+		tmp.x = x*other.x;
+		tmp.y = y*other.y;
+		tmp.z = z*other.z;
+		tmp.w = w*other.w;
+		return tmp;
+	}
+
+	
+
+	float4 operator*(const float& other)
+	{
+		float4 tmp;
+		tmp.x = x*other;
+		tmp.y = y*other;
+		tmp.z = z*other;
+		tmp.w = w*other;
+		return tmp;
+	}
+
+	
+
+	float4& operator+=(const float4& other)
+	{
+		x += other.x;
+		y += other.y;
+		z += other.z;
+		w += other.w;
+		return *this;
+	}
+
+	float4& operator-=(const float4& other)
+	{
+		x -= other.x;
+		y -= other.y;
+		z -= other.z;
+		w -= other.w;
+		return *this;
+	}
+
+	float4& operator *=(float scalar)
+	{
+		x *= scalar;
+		y *= scalar;
+		z *= scalar;
+		w *= scalar;
+		return (*this);
+	}
+
+	
+	
+	
+	
+};
+
+static float4 fabs(const float4& a)
+{
+	float4 tmp;
+	tmp.x = a.x < 0.f ? 0.f  : a.x;
+	tmp.y = a.y < 0.f ? 0.f  : a.y;
+	tmp.z = a.z < 0.f ? 0.f  : a.z;
+	tmp.w = a.w < 0.f ? 0.f  : a.w;
+	return tmp;
+}
+static float4 operator+(const float4& a,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a.x + b.x;
+	tmp.y = a.y + b.y;
+	tmp.z = a.z + b.z;
+	tmp.w = a.w + b.w;
+	return tmp;
+}
+
+
+static float8 operator+(const float8& a,const float8& b)
+{
+	float8 tmp(0);
+	tmp.s0  = a.s0 + b.s0;
+	tmp.s1  = a.s1 + b.s1;
+	tmp.s2  = a.s2 + b.s2;
+	tmp.s3  = a.s3 + b.s3;
+	tmp.s4  = a.s4 + b.s4;
+	tmp.s5  = a.s5 + b.s5;
+	tmp.s6  = a.s6 + b.s6;
+	tmp.s7  = a.s7 + b.s7;
+	return tmp;
+}
+
+
+static float4 operator-(const float4& a,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a.x - b.x;
+	tmp.y = a.y - b.y;
+	tmp.z = a.z - b.z;
+	tmp.w = a.w - b.w;
+	return tmp;
+}
+
+static float8 operator-(const float8& a,const float8& b)
+{
+	float8 tmp(0);
+	tmp.s0  = a.s0 - b.s0;
+	tmp.s1  = a.s1 - b.s1;
+	tmp.s2  = a.s2 - b.s2;
+	tmp.s3  = a.s3 - b.s3;
+	tmp.s4  = a.s4 - b.s4;
+	tmp.s5  = a.s5 - b.s5;
+	tmp.s6  = a.s6 - b.s6;
+	tmp.s7  = a.s7 - b.s7;
+	return tmp;
+}
+
+static float4 operator*(float a,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a * b.x;
+	tmp.y = a * b.y;
+	tmp.z = a * b.z;
+	tmp.w = a * b.w;
+	return tmp;
+}
+
+static float4 operator/(const float4& b,float a)
+{
+	float4 tmp;
+	tmp.x = b.x/a;
+	tmp.y = b.y/a;
+	tmp.z = b.z/a;
+	tmp.w = b.w/a;
+	return tmp;
+}
+
+
+
+
+
+static float dot(const float4&a ,const float4& b)
+{
+	float4 tmp;
+	tmp.x = a.x*b.x;
+	tmp.y = a.y*b.y;
+	tmp.z = a.z*b.z;
+	tmp.w = a.w*b.w;
+	return tmp.x+tmp.y+tmp.z+tmp.w;
+}
+
+static float length(const float4&a)
+{
+	float l = sqrtf(a.x*a.x+a.y*a.y+a.z*a.z);
+	return l;
+}
+
+static float4 normalize(const float4&a)
+{
+	float4 tmp;
+	float l = length(a);
+	tmp = 1.f/l*a;
+	return tmp;
+}
+
+
+
+static float4 cross(const float4&a ,const float4& b)
+{
+	float4 tmp;
+	tmp.x =  a.y*b.z - a.z*b.y;
+	tmp.y = -a.x*b.z + a.z*b.x;
+	tmp.z =  a.x*b.y - a.y*b.x;
+	tmp.w = 0.f;
+	return tmp;
+}
+
+static float max(float a, float b) 
+{
+	return (a >= b) ? a : b;
+}
+
+
+static float min(float a, float b) 
+{
+	return (a <= b) ? a : b;
+}
+
+static float fmax(float a, float b) 
+{
+	return (a >= b) ? a : b;
+}
+
+static float fmin(float a, float b) 
+{
+	return (a <= b) ? a : b;
+}
+
+struct int2
+{
+	int x,y;
+};
+
+struct uint2
+{
+	unsigned int x,y;
+};
+
+//typedef int2 uint2;
+
+typedef unsigned int uint;
+
+struct int4
+{
+	int x,y,z,w;
+};
+
+struct uint4
+{
+	unsigned int x,y,z,w;
+	uint4() {}
+	uint4(uint val) { x = y = z = w = val; }
+	uint4& operator+=(const uint4& other)
+	{
+		x += other.x;
+		y += other.y;
+		z += other.z;
+		w += other.w;
+		return *this;
+	}
+};
+static uint4 operator+(const uint4& a,const uint4& b)
+{
+	uint4 tmp;
+	tmp.x = a.x + b.x;
+	tmp.y = a.y + b.y;
+	tmp.z = a.z + b.z;
+	tmp.w = a.w + b.w;
+	return tmp;
+}
+static uint4 operator-(const uint4& a,const uint4& b)
+{
+	uint4 tmp;
+	tmp.x = a.x - b.x;
+	tmp.y = a.y - b.y;
+	tmp.z = a.z - b.z;
+	tmp.w = a.w - b.w;
+	return tmp;
+}
+
+#define native_sqrt sqrtf
+#define native_sin sinf
+#define native_cos cosf
+#define native_powr powf
+
+#define GUID_ARG ,int __guid_arg
+#define GUID_ARG_VAL ,__guid_arg
+
+
+#define as_int(a) (*((int*)&(a)))
+
+extern "C" int gMiniCLNumOutstandingTasks;
+//	extern "C" void __kernel_func();
+
+
diff --git a/Engine/lib/bullet/src/MiniCL/cl_gl.h b/Engine/lib/bullet/src/MiniCL/cl_gl.h
index 71bdaaa6e..0a69d6ecb 100644
--- a/Engine/lib/bullet/src/MiniCL/cl_gl.h
+++ b/Engine/lib/bullet/src/MiniCL/cl_gl.h
@@ -27,7 +27,7 @@
 #ifdef __APPLE__
 #include <OpenCL/cl_platform.h>
 #else
-#include <CL/cl_platform.h>
+#include <MiniCL/cl_platform.h>
 #endif	
 
 #ifdef __cplusplus
diff --git a/Engine/lib/bullet/src/MiniCL/cl_platform.h b/Engine/lib/bullet/src/MiniCL/cl_platform.h
index 522512996..43219e141 100644
--- a/Engine/lib/bullet/src/MiniCL/cl_platform.h
+++ b/Engine/lib/bullet/src/MiniCL/cl_platform.h
@@ -24,6 +24,16 @@
 #ifndef __CL_PLATFORM_H
 #define __CL_PLATFORM_H
 
+#define CL_PLATFORM_MINI_CL  0x12345
+
+struct MiniCLKernelDesc
+{
+	MiniCLKernelDesc(void* pCode, const char* pName);
+};
+
+#define MINICL_REGISTER(__kernel_func) static MiniCLKernelDesc __kernel_func##Desc((void*)__kernel_func, #__kernel_func);
+
+
 #ifdef __APPLE__
     /* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
     #include <AvailabilityMacros.h>
@@ -43,7 +53,7 @@ extern "C" {
 #define CL_EXTENSION_WEAK_LINK                         
 #endif
 
-#ifdef WIN32
+#if defined (_WIN32) && ! defined (__MINGW32__)
 typedef signed   __int8  int8_t;
 typedef unsigned __int8  uint8_t;
 typedef signed   __int16 int16_t;
diff --git a/Engine/lib/bullet/src/btBulletCollisionCommon.h b/Engine/lib/bullet/src/btBulletCollisionCommon.h
index 4b14f6d00..af981b5d3 100644
--- a/Engine/lib/bullet/src/btBulletCollisionCommon.h
+++ b/Engine/lib/bullet/src/btBulletCollisionCommon.h
@@ -33,6 +33,7 @@ subject to the following restrictions:
 #include "BulletCollision/CollisionShapes/btTriangleMesh.h"
 #include "BulletCollision/CollisionShapes/btConvexTriangleMeshShape.h"
 #include "BulletCollision/CollisionShapes/btBvhTriangleMeshShape.h"
+#include "BulletCollision/CollisionShapes/btScaledBvhTriangleMeshShape.h"
 #include "BulletCollision/CollisionShapes/btTriangleMeshShape.h"
 #include "BulletCollision/CollisionShapes/btTriangleIndexVertexArray.h"
 #include "BulletCollision/CollisionShapes/btCompoundShape.h"
@@ -44,7 +45,6 @@ subject to the following restrictions:
 ///Narrowphase Collision Detector
 #include "BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.h"
 
-//btSphereBoxCollisionAlgorithm is broken, use gjk for now
 //#include "BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.h"
 #include "BulletCollision/CollisionDispatch/btDefaultCollisionConfiguration.h"
 
@@ -61,6 +61,8 @@ subject to the following restrictions:
 #include "LinearMath/btDefaultMotionState.h"
 #include "LinearMath/btQuickprof.h"
 #include "LinearMath/btIDebugDraw.h"
+#include "LinearMath/btSerializer.h"
+
 
 #endif //BULLET_COLLISION_COMMON_H
 
diff --git a/Engine/lib/bullet/src/btBulletDynamicsCommon.h b/Engine/lib/bullet/src/btBulletDynamicsCommon.h
index db8b37989..dbd175c3f 100644
--- a/Engine/lib/bullet/src/btBulletDynamicsCommon.h
+++ b/Engine/lib/bullet/src/btBulletDynamicsCommon.h
@@ -20,7 +20,6 @@ subject to the following restrictions:
 #include "btBulletCollisionCommon.h"
 
 #include "BulletDynamics/Dynamics/btDiscreteDynamicsWorld.h"
-#include "BulletDynamics/Dynamics/btContinuousDynamicsWorld.h"
 
 #include "BulletDynamics/Dynamics/btSimpleDynamicsWorld.h"
 #include "BulletDynamics/Dynamics/btRigidBody.h"
@@ -33,6 +32,7 @@ subject to the following restrictions:
 #include "BulletDynamics/ConstraintSolver/btGeneric6DofSpringConstraint.h"
 #include "BulletDynamics/ConstraintSolver/btUniversalConstraint.h"
 #include "BulletDynamics/ConstraintSolver/btHinge2Constraint.h"
+#include "BulletDynamics/ConstraintSolver/btGearConstraint.h"
 
 #include "BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.h"
 
diff --git a/Engine/lib/bullet/src/ibmsdk/Makefile b/Engine/lib/bullet/src/ibmsdk/Makefile
deleted file mode 100644
index 1a0453eef..000000000
--- a/Engine/lib/bullet/src/ibmsdk/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-#### Visit Bullet library ibmsdk dirs and build  code
-
-DIRS :=				\
-../BulletCollision/ibmsdk	\
-../BulletDynamics/ibmsdk	\
-../LinearMath/ibmsdk
-
-IBM_CELLSDK_VERSION := $(shell if [ -d /opt/cell ]; then echo "3.0"; fi)
-
-ifeq ("$(IBM_CELLSDK_VERSION)","3.0")
-        CELL_TOP ?= /opt/cell/sdk
-        include $(CELL_TOP)/buildutils/make.footer
-else
-        CELL_TOP ?= /opt/ibm/cell-sdk/prototype
-        include $(CELL_TOP)/make.footer
-endif
diff --git a/Engine/lib/bullet/src/vectormath/neon/boolInVec.h b/Engine/lib/bullet/src/vectormath/neon/boolInVec.h
new file mode 100644
index 000000000..ba16838c0
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/neon/boolInVec.h
@@ -0,0 +1,226 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+private:
+    unsigned int mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline boolInVec( ) { };
+
+    // Construct from a value converted from float
+    //
+    inline boolInVec(floatInVec vec);
+
+    // Explicit cast from bool
+    //
+    explicit inline boolInVec(bool scalar);
+
+    // Explicit cast to bool
+    //
+    inline bool getAsBool() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to bool
+    //
+    inline operator bool() const;
+#endif
+
+    // Boolean negation operator
+    //
+    inline const boolInVec operator ! () const;
+
+    // Assignment operator
+    //
+    inline boolInVec& operator = (boolInVec vec);
+
+    // Boolean and assignment operator
+    //
+    inline boolInVec& operator &= (boolInVec vec);
+
+    // Boolean exclusive or assignment operator
+    //
+    inline boolInVec& operator ^= (boolInVec vec);
+
+    // Boolean or assignment operator
+    //
+    inline boolInVec& operator |= (boolInVec vec);
+
+};
+
+// Equal operator
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+
+// And operator
+//
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+
+// Exclusive or operator
+//
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+
+// Or operator
+//
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// Conditionally select between two values
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    mData = -(int)scalar;
+}
+
+inline
+bool
+boolInVec::getAsBool() const
+{
+    return (mData > 0);
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+boolInVec::operator bool() const
+{
+    return getAsBool();
+}
+#endif
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(!mData);
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() == vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() & vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() | vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // boolInVec_h
+
diff --git a/Engine/lib/bullet/src/vectormath/neon/floatInVec.h b/Engine/lib/bullet/src/vectormath/neon/floatInVec.h
new file mode 100644
index 000000000..26147d22b
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/neon/floatInVec.h
@@ -0,0 +1,344 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+// A class representing a scalar float value contained in a vector register
+// This class does not support fastmath
+class floatInVec
+{
+private:
+    float mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline floatInVec( ) { };
+
+    // Construct from a value converted from bool
+    //
+    inline floatInVec(boolInVec vec);
+
+    // Explicit cast from float
+    //
+    explicit inline floatInVec(float scalar);
+
+    // Explicit cast to float
+    //
+    inline float getAsFloat() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to float
+    //
+    inline operator float() const;
+#endif
+
+    // Post increment (add 1.0f)
+    //
+    inline const floatInVec operator ++ (int);
+
+    // Post decrement (subtract 1.0f)
+    //
+    inline const floatInVec operator -- (int);
+
+    // Pre increment (add 1.0f)
+    //
+    inline floatInVec& operator ++ ();
+
+    // Pre decrement (subtract 1.0f)
+    //
+    inline floatInVec& operator -- ();
+
+    // Negation operator
+    //
+    inline const floatInVec operator - () const;
+
+    // Assignment operator
+    //
+    inline floatInVec& operator = (floatInVec vec);
+
+    // Multiplication assignment operator
+    //
+    inline floatInVec& operator *= (floatInVec vec);
+
+    // Division assignment operator
+    //
+    inline floatInVec& operator /= (floatInVec vec);
+
+    // Addition assignment operator
+    //
+    inline floatInVec& operator += (floatInVec vec);
+
+    // Subtraction assignment operator
+    //
+    inline floatInVec& operator -= (floatInVec vec);
+
+};
+
+// Multiplication operator
+//
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+
+// Division operator
+//
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+
+// Addition operator
+//
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+
+// Subtraction operator
+//
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+
+// Less than operator
+//
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+
+// Less than or equal operator
+//
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+
+// Greater than operator
+//
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+
+// Greater than or equal operator
+//
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+
+// Equal operator
+//
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// Conditionally select between two values
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = float(vec.getAsBool());
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+    mData = scalar;
+}
+
+inline
+float
+floatInVec::getAsFloat() const
+{
+    return mData;
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+floatInVec::operator float() const
+{
+    return getAsFloat();
+}
+#endif
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    float olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    float olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(1.0f);
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(1.0f);
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec(-mData);
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(num.getAsFloat() / den.getAsFloat());
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
+
diff --git a/Engine/lib/bullet/src/vectormath/neon/mat_aos.h b/Engine/lib/bullet/src/vectormath/neon/mat_aos.h
new file mode 100644
index 000000000..e61f601c3
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/neon/mat_aos.h
@@ -0,0 +1,1631 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+inline Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+inline Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+inline Matrix3::Matrix3( const Quat & unitQuat )
+{
+    float qx, qy, qz, qw, qx2, qy2, qz2, qxqx2, qyqy2, qzqz2, qxqy2, qyqz2, qzqw2, qxqz2, qyqw2, qxqw2;
+    qx = unitQuat.getX();
+    qy = unitQuat.getY();
+    qz = unitQuat.getZ();
+    qw = unitQuat.getW();
+    qx2 = ( qx + qx );
+    qy2 = ( qy + qy );
+    qz2 = ( qz + qz );
+    qxqx2 = ( qx * qx2 );
+    qxqy2 = ( qx * qy2 );
+    qxqz2 = ( qx * qz2 );
+    qxqw2 = ( qw * qx2 );
+    qyqy2 = ( qy * qy2 );
+    qyqz2 = ( qy * qz2 );
+    qyqw2 = ( qw * qy2 );
+    qzqz2 = ( qz * qz2 );
+    qzqw2 = ( qw * qz2 );
+    mCol0 = Vector3( ( ( 1.0f - qyqy2 ) - qzqz2 ), ( qxqy2 + qzqw2 ), ( qxqz2 - qyqw2 ) );
+    mCol1 = Vector3( ( qxqy2 - qzqw2 ), ( ( 1.0f - qxqx2 ) - qzqz2 ), ( qyqz2 + qxqw2 ) );
+    mCol2 = Vector3( ( qxqz2 + qyqw2 ), ( qyqz2 - qxqw2 ), ( ( 1.0f - qxqx2 ) - qyqy2 ) );
+}
+
+inline Matrix3::Matrix3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+inline Matrix3 & Matrix3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setRow( int row, const Vector3 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+inline Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+inline Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+inline const Matrix3 transpose( const Matrix3 & mat )
+{
+    return Matrix3(
+        Vector3( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX() ),
+        Vector3( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY() ),
+        Vector3( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ() )
+    );
+}
+
+inline const Matrix3 inverse( const Matrix3 & mat )
+{
+    Vector3 tmp0, tmp1, tmp2;
+    float detinv;
+    tmp0 = cross( mat.getCol1(), mat.getCol2() );
+    tmp1 = cross( mat.getCol2(), mat.getCol0() );
+    tmp2 = cross( mat.getCol0(), mat.getCol1() );
+    detinv = ( 1.0f / dot( mat.getCol2(), tmp2 ) );
+    return Matrix3(
+        Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) ),
+        Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) ),
+        Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) )
+    );
+}
+
+inline float determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+inline const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+inline const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+inline const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector3 Matrix3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+inline Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+inline const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( )
+    );
+}
+
+inline const Matrix3 Matrix3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix3(
+        Vector3( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ) ),
+        Vector3( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ) ),
+        Vector3( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ) )
+    );
+}
+
+inline const Matrix3 Matrix3::rotation( const Quat & unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+inline const Matrix3 Matrix3::scale( const Vector3 & scaleVec )
+{
+    return Matrix3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() )
+    );
+}
+
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+inline void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+inline Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+inline Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+inline Matrix4::Matrix4( const Vector4 & _col0, const Vector4 & _col1, const Vector4 & _col2, const Vector4 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Matrix4::Matrix4( const Matrix3 & mat, const Vector3 & translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4::Matrix4( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+inline Matrix4 & Matrix4::setCol0( const Vector4 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol1( const Vector4 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol2( const Vector4 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol3( const Vector4 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setCol( int col, const Vector4 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+inline const Matrix4 transpose( const Matrix4 & mat )
+{
+    return Matrix4(
+        Vector4( mat.getCol0().getX(), mat.getCol1().getX(), mat.getCol2().getX(), mat.getCol3().getX() ),
+        Vector4( mat.getCol0().getY(), mat.getCol1().getY(), mat.getCol2().getY(), mat.getCol3().getY() ),
+        Vector4( mat.getCol0().getZ(), mat.getCol1().getZ(), mat.getCol2().getZ(), mat.getCol3().getZ() ),
+        Vector4( mat.getCol0().getW(), mat.getCol1().getW(), mat.getCol2().getW(), mat.getCol3().getW() )
+    );
+}
+
+inline const Matrix4 inverse( const Matrix4 & mat )
+{
+    Vector4 res0, res1, res2, res3;
+    float mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, detInv;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    res0.setX( ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) ) );
+    res0.setY( ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) ) );
+    res0.setZ( ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) ) );
+    res0.setW( ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) ) );
+    detInv = ( 1.0f / ( ( ( ( mA * res0.getX() ) + ( mE * res0.getY() ) ) + ( mI * res0.getZ() ) ) + ( mM * res0.getW() ) ) );
+    res1.setX( ( mI * tmp1 ) );
+    res1.setY( ( mM * tmp0 ) );
+    res1.setZ( ( mA * tmp1 ) );
+    res1.setW( ( mE * tmp0 ) );
+    res3.setX( ( mI * tmp3 ) );
+    res3.setY( ( mM * tmp2 ) );
+    res3.setZ( ( mA * tmp3 ) );
+    res3.setW( ( mE * tmp2 ) );
+    res2.setX( ( mI * tmp5 ) );
+    res2.setY( ( mM * tmp4 ) );
+    res2.setZ( ( mA * tmp5 ) );
+    res2.setW( ( mE * tmp4 ) );
+    tmp0 = ( ( mI * mB ) - ( mA * mJ ) );
+    tmp1 = ( ( mM * mF ) - ( mE * mN ) );
+    tmp2 = ( ( mI * mD ) - ( mA * mL ) );
+    tmp3 = ( ( mM * mH ) - ( mE * mP ) );
+    tmp4 = ( ( mI * mC ) - ( mA * mK ) );
+    tmp5 = ( ( mM * mG ) - ( mE * mO ) );
+    res2.setX( ( ( ( mL * tmp1 ) - ( mJ * tmp3 ) ) + res2.getX() ) );
+    res2.setY( ( ( ( mP * tmp0 ) - ( mN * tmp2 ) ) + res2.getY() ) );
+    res2.setZ( ( ( ( mB * tmp3 ) - ( mD * tmp1 ) ) - res2.getZ() ) );
+    res2.setW( ( ( ( mF * tmp2 ) - ( mH * tmp0 ) ) - res2.getW() ) );
+    res3.setX( ( ( ( mJ * tmp5 ) - ( mK * tmp1 ) ) + res3.getX() ) );
+    res3.setY( ( ( ( mN * tmp4 ) - ( mO * tmp0 ) ) + res3.getY() ) );
+    res3.setZ( ( ( ( mC * tmp1 ) - ( mB * tmp5 ) ) - res3.getZ() ) );
+    res3.setW( ( ( ( mG * tmp0 ) - ( mF * tmp4 ) ) - res3.getW() ) );
+    res1.setX( ( ( ( mK * tmp3 ) - ( mL * tmp5 ) ) - res1.getX() ) );
+    res1.setY( ( ( ( mO * tmp2 ) - ( mP * tmp4 ) ) - res1.getY() ) );
+    res1.setZ( ( ( ( mD * tmp5 ) - ( mC * tmp3 ) ) + res1.getZ() ) );
+    res1.setW( ( ( ( mH * tmp4 ) - ( mG * tmp2 ) ) + res1.getW() ) );
+    return Matrix4(
+        ( res0 * detInv ),
+        ( res1 * detInv ),
+        ( res2 * detInv ),
+        ( res3 * detInv )
+    );
+}
+
+inline const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+inline const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+inline float determinant( const Matrix4 & mat )
+{
+    float dx, dy, dz, dw, mA, mB, mC, mD, mE, mF, mG, mH, mI, mJ, mK, mL, mM, mN, mO, mP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+    mA = mat.getCol0().getX();
+    mB = mat.getCol0().getY();
+    mC = mat.getCol0().getZ();
+    mD = mat.getCol0().getW();
+    mE = mat.getCol1().getX();
+    mF = mat.getCol1().getY();
+    mG = mat.getCol1().getZ();
+    mH = mat.getCol1().getW();
+    mI = mat.getCol2().getX();
+    mJ = mat.getCol2().getY();
+    mK = mat.getCol2().getZ();
+    mL = mat.getCol2().getW();
+    mM = mat.getCol3().getX();
+    mN = mat.getCol3().getY();
+    mO = mat.getCol3().getZ();
+    mP = mat.getCol3().getW();
+    tmp0 = ( ( mK * mD ) - ( mC * mL ) );
+    tmp1 = ( ( mO * mH ) - ( mG * mP ) );
+    tmp2 = ( ( mB * mK ) - ( mJ * mC ) );
+    tmp3 = ( ( mF * mO ) - ( mN * mG ) );
+    tmp4 = ( ( mJ * mD ) - ( mB * mL ) );
+    tmp5 = ( ( mN * mH ) - ( mF * mP ) );
+    dx = ( ( ( mJ * tmp1 ) - ( mL * tmp3 ) ) - ( mK * tmp5 ) );
+    dy = ( ( ( mN * tmp0 ) - ( mP * tmp2 ) ) - ( mO * tmp4 ) );
+    dz = ( ( ( mD * tmp3 ) + ( mC * tmp5 ) ) - ( mB * tmp1 ) );
+    dw = ( ( ( mH * tmp2 ) + ( mG * tmp4 ) ) - ( mF * tmp0 ) );
+    return ( ( ( ( mA * dx ) + ( mE * dy ) ) + ( mI * dz ) ) + ( mM * dw ) );
+}
+
+inline const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+inline const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+inline Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+inline const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+inline const Vector4 Matrix4::operator *( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ) + ( mCol3.getX() * vec.getW() ) ),
+        ( ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ) + ( mCol3.getY() * vec.getW() ) ),
+        ( ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ) + ( mCol3.getZ() * vec.getW() ) ),
+        ( ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) ) + ( mCol3.getW() * vec.getW() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Vector3 & vec ) const
+{
+    return Vector4(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) ),
+        ( ( ( mCol0.getW() * vec.getX() ) + ( mCol1.getW() * vec.getY() ) ) + ( mCol2.getW() * vec.getZ() ) )
+    );
+}
+
+inline const Vector4 Matrix4::operator *( const Point3 & pnt ) const
+{
+    return Vector4(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() ),
+        ( ( ( ( mCol0.getW() * pnt.getX() ) + ( mCol1.getW() * pnt.getY() ) ) + ( mCol2.getW() * pnt.getZ() ) ) + mCol3.getW() )
+    );
+}
+
+inline const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+inline const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+inline Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+inline const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+inline const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+inline Matrix4 & Matrix4::setTranslation( const Vector3 & translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+inline const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+inline const Matrix4 Matrix4::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( 0.0f, c, s, 0.0f ),
+        Vector4( 0.0f, -s, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, 0.0f, -s, 0.0f ),
+        Vector4::yAxis( ),
+        Vector4( s, 0.0f, c, 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Matrix4(
+        Vector4( c, s, 0.0f, 0.0f ),
+        Vector4( -s, c, 0.0f, 0.0f ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Matrix4(
+        Vector4( ( cZ * cY ), ( sZ * cY ), -sY, 0.0f ),
+        Vector4( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ), 0.0f ),
+        Vector4( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( float radians, const Vector3 & unitVec )
+{
+    float x, y, z, s, c, oneMinusC, xy, yz, zx;
+    s = sinf( radians );
+    c = cosf( radians );
+    x = unitVec.getX();
+    y = unitVec.getY();
+    z = unitVec.getZ();
+    xy = ( x * y );
+    yz = ( y * z );
+    zx = ( z * x );
+    oneMinusC = ( 1.0f - c );
+    return Matrix4(
+        Vector4( ( ( ( x * x ) * oneMinusC ) + c ), ( ( xy * oneMinusC ) + ( z * s ) ), ( ( zx * oneMinusC ) - ( y * s ) ), 0.0f ),
+        Vector4( ( ( xy * oneMinusC ) - ( z * s ) ), ( ( ( y * y ) * oneMinusC ) + c ), ( ( yz * oneMinusC ) + ( x * s ) ), 0.0f ),
+        Vector4( ( ( zx * oneMinusC ) + ( y * s ) ), ( ( yz * oneMinusC ) - ( x * s ) ), ( ( ( z * z ) * oneMinusC ) + c ), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 Matrix4::rotation( const Quat & unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+inline const Matrix4 Matrix4::scale( const Vector3 & scaleVec )
+{
+    return Matrix4(
+        Vector4( scaleVec.getX(), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, scaleVec.getY(), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, scaleVec.getZ(), 0.0f ),
+        Vector4::wAxis( )
+    );
+}
+
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+inline const Matrix4 Matrix4::translation( const Vector3 & translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+inline const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    f = tanf( ( (float)( _VECTORMATH_PI_OVER_2 ) - ( 0.5f * fovyRadians ) ) );
+    rangeInv = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( f / aspect ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, f, 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( ( zNear + zFar ) * rangeInv ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( ( zNear * zFar ) * rangeInv ) * 2.0f ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf, n2;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    n2 = ( zNear + zNear );
+    return Matrix4(
+        Vector4( ( n2 * inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( n2 * inv_tb ), 0.0f, 0.0f ),
+        Vector4( ( sum_rl * inv_rl ), ( sum_tb * inv_tb ), ( sum_nf * inv_nf ), -1.0f ),
+        Vector4( 0.0f, 0.0f, ( ( n2 * inv_nf ) * zFar ), 0.0f )
+    );
+}
+
+inline const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    float sum_rl, sum_tb, sum_nf, inv_rl, inv_tb, inv_nf;
+    sum_rl = ( right + left );
+    sum_tb = ( top + bottom );
+    sum_nf = ( zNear + zFar );
+    inv_rl = ( 1.0f / ( right - left ) );
+    inv_tb = ( 1.0f / ( top - bottom ) );
+    inv_nf = ( 1.0f / ( zNear - zFar ) );
+    return Matrix4(
+        Vector4( ( inv_rl + inv_rl ), 0.0f, 0.0f, 0.0f ),
+        Vector4( 0.0f, ( inv_tb + inv_tb ), 0.0f, 0.0f ),
+        Vector4( 0.0f, 0.0f, ( inv_nf + inv_nf ), 0.0f ),
+        Vector4( ( -sum_rl * inv_rl ), ( -sum_tb * inv_tb ), ( sum_nf * inv_nf ), 1.0f )
+    );
+}
+
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+inline void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+inline Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+inline Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+inline Transform3::Transform3( const Vector3 & _col0, const Vector3 & _col1, const Vector3 & _col2, const Vector3 & _col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+inline Transform3::Transform3( const Matrix3 & tfrm, const Vector3 & translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3::Transform3( const Quat & unitQuat, const Vector3 & translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+inline Transform3 & Transform3::setCol0( const Vector3 & _col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol1( const Vector3 & _col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol2( const Vector3 & _col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol3( const Vector3 & _col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+inline Transform3 & Transform3::setCol( int col, const Vector3 & vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+inline Transform3 & Transform3::setRow( int row, const Vector4 & vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+inline Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+inline float Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+inline const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+inline const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+inline const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+inline const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+inline const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+inline Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+inline const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+inline Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+inline const Transform3 inverse( const Transform3 & tfrm )
+{
+    Vector3 tmp0, tmp1, tmp2, inv0, inv1, inv2;
+    float detinv;
+    tmp0 = cross( tfrm.getCol1(), tfrm.getCol2() );
+    tmp1 = cross( tfrm.getCol2(), tfrm.getCol0() );
+    tmp2 = cross( tfrm.getCol0(), tfrm.getCol1() );
+    detinv = ( 1.0f / dot( tfrm.getCol2(), tmp2 ) );
+    inv0 = Vector3( ( tmp0.getX() * detinv ), ( tmp1.getX() * detinv ), ( tmp2.getX() * detinv ) );
+    inv1 = Vector3( ( tmp0.getY() * detinv ), ( tmp1.getY() * detinv ), ( tmp2.getY() * detinv ) );
+    inv2 = Vector3( ( tmp0.getZ() * detinv ), ( tmp1.getZ() * detinv ), ( tmp2.getZ() * detinv ) );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    Vector3 inv0, inv1, inv2;
+    inv0 = Vector3( tfrm.getCol0().getX(), tfrm.getCol1().getX(), tfrm.getCol2().getX() );
+    inv1 = Vector3( tfrm.getCol0().getY(), tfrm.getCol1().getY(), tfrm.getCol2().getY() );
+    inv2 = Vector3( tfrm.getCol0().getZ(), tfrm.getCol1().getZ(), tfrm.getCol2().getZ() );
+    return Transform3(
+        inv0,
+        inv1,
+        inv2,
+        Vector3( ( -( ( inv0 * tfrm.getCol3().getX() ) + ( ( inv1 * tfrm.getCol3().getY() ) + ( inv2 * tfrm.getCol3().getZ() ) ) ) ) )
+    );
+}
+
+inline const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+inline const Vector3 Transform3::operator *( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( ( ( mCol0.getX() * vec.getX() ) + ( mCol1.getX() * vec.getY() ) ) + ( mCol2.getX() * vec.getZ() ) ),
+        ( ( ( mCol0.getY() * vec.getX() ) + ( mCol1.getY() * vec.getY() ) ) + ( mCol2.getY() * vec.getZ() ) ),
+        ( ( ( mCol0.getZ() * vec.getX() ) + ( mCol1.getZ() * vec.getY() ) ) + ( mCol2.getZ() * vec.getZ() ) )
+    );
+}
+
+inline const Point3 Transform3::operator *( const Point3 & pnt ) const
+{
+    return Point3(
+        ( ( ( ( mCol0.getX() * pnt.getX() ) + ( mCol1.getX() * pnt.getY() ) ) + ( mCol2.getX() * pnt.getZ() ) ) + mCol3.getX() ),
+        ( ( ( ( mCol0.getY() * pnt.getX() ) + ( mCol1.getY() * pnt.getY() ) ) + ( mCol2.getY() * pnt.getZ() ) ) + mCol3.getY() ),
+        ( ( ( ( mCol0.getZ() * pnt.getX() ) + ( mCol1.getZ() * pnt.getY() ) ) + ( mCol2.getZ() * pnt.getZ() ) ) + mCol3.getZ() )
+    );
+}
+
+inline const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+inline Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+inline const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+inline const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+inline Transform3 & Transform3::setTranslation( const Vector3 & translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+inline const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+inline const Transform3 Transform3::rotationX( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( 0.0f, c, s ),
+        Vector3( 0.0f, -s, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationY( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, 0.0f, -s ),
+        Vector3::yAxis( ),
+        Vector3( s, 0.0f, c ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZ( float radians )
+{
+    float s, c;
+    s = sinf( radians );
+    c = cosf( radians );
+    return Transform3(
+        Vector3( c, s, 0.0f ),
+        Vector3( -s, c, 0.0f ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotationZYX( const Vector3 & radiansXYZ )
+{
+    float sX, cX, sY, cY, sZ, cZ, tmp0, tmp1;
+    sX = sinf( radiansXYZ.getX() );
+    cX = cosf( radiansXYZ.getX() );
+    sY = sinf( radiansXYZ.getY() );
+    cY = cosf( radiansXYZ.getY() );
+    sZ = sinf( radiansXYZ.getZ() );
+    cZ = cosf( radiansXYZ.getZ() );
+    tmp0 = ( cZ * sY );
+    tmp1 = ( sZ * sY );
+    return Transform3(
+        Vector3( ( cZ * cY ), ( sZ * cY ), -sY ),
+        Vector3( ( ( tmp0 * sX ) - ( sZ * cX ) ), ( ( tmp1 * sX ) + ( cZ * cX ) ), ( cY * sX ) ),
+        Vector3( ( ( tmp0 * cX ) + ( sZ * sX ) ), ( ( tmp1 * cX ) - ( cZ * sX ) ), ( cY * cX ) ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 Transform3::rotation( float radians, const Vector3 & unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::rotation( const Quat & unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+inline const Transform3 Transform3::scale( const Vector3 & scaleVec )
+{
+    return Transform3(
+        Vector3( scaleVec.getX(), 0.0f, 0.0f ),
+        Vector3( 0.0f, scaleVec.getY(), 0.0f ),
+        Vector3( 0.0f, 0.0f, scaleVec.getZ() ),
+        Vector3( 0.0f )
+    );
+}
+
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+inline const Transform3 Transform3::translation( const Vector3 & translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+inline void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+inline Quat::Quat( const Matrix3 & tfrm )
+{
+    float trace, radicand, scale, xx, yx, zx, xy, yy, zy, xz, yz, zz, tmpx, tmpy, tmpz, tmpw, qx, qy, qz, qw;
+    int negTrace, ZgtX, ZgtY, YgtX;
+    int largestXorY, largestYorZ, largestZorX;
+
+    xx = tfrm.getCol0().getX();
+    yx = tfrm.getCol0().getY();
+    zx = tfrm.getCol0().getZ();
+    xy = tfrm.getCol1().getX();
+    yy = tfrm.getCol1().getY();
+    zy = tfrm.getCol1().getZ();
+    xz = tfrm.getCol2().getX();
+    yz = tfrm.getCol2().getY();
+    zz = tfrm.getCol2().getZ();
+
+    trace = ( ( xx + yy ) + zz );
+
+    negTrace = ( trace < 0.0f );
+    ZgtX = zz > xx;
+    ZgtY = zz > yy;
+    YgtX = yy > xx;
+    largestXorY = ( !ZgtX || !ZgtY ) && negTrace;
+    largestYorZ = ( YgtX || ZgtX ) && negTrace;
+    largestZorX = ( ZgtY || !YgtX ) && negTrace;
+    
+    if ( largestXorY )
+    {
+        zz = -zz;
+        xy = -xy;
+    }
+    if ( largestYorZ )
+    {
+        xx = -xx;
+        yz = -yz;
+    }
+    if ( largestZorX )
+    {
+        yy = -yy;
+        zx = -zx;
+    }
+
+    radicand = ( ( ( xx + yy ) + zz ) + 1.0f );
+    scale = ( 0.5f * ( 1.0f / sqrtf( radicand ) ) );
+
+    tmpx = ( ( zy - yz ) * scale );
+    tmpy = ( ( xz - zx ) * scale );
+    tmpz = ( ( yx - xy ) * scale );
+    tmpw = ( radicand * scale );
+    qx = tmpx;
+    qy = tmpy;
+    qz = tmpz;
+    qw = tmpw;
+
+    if ( largestXorY )
+    {
+        qx = tmpw;
+        qy = tmpz;
+        qz = tmpy;
+        qw = tmpx;
+    }
+    if ( largestYorZ )
+    {
+        tmpx = qx;
+        tmpz = qz;
+        qx = qy;
+        qy = tmpx;
+        qz = qw;
+        qw = tmpz;
+    }
+
+    mXYZW[0] = qx;
+    mXYZW[1] = qy;
+    mXYZW[2] = qz;
+    mXYZW[3] = qw;
+}
+
+inline const Matrix3 outer( const Vector3 & tfrm0, const Vector3 & tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+inline const Matrix4 outer( const Vector4 & tfrm0, const Vector4 & tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Vector3(
+        ( ( ( vec.getX() * mat.getCol0().getX() ) + ( vec.getY() * mat.getCol0().getY() ) ) + ( vec.getZ() * mat.getCol0().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol1().getX() ) + ( vec.getY() * mat.getCol1().getY() ) ) + ( vec.getZ() * mat.getCol1().getZ() ) ),
+        ( ( ( vec.getX() * mat.getCol2().getX() ) + ( vec.getY() * mat.getCol2().getY() ) ) + ( vec.getZ() * mat.getCol2().getZ() ) )
+    );
+}
+
+inline const Matrix3 crossMatrix( const Vector3 & vec )
+{
+    return Matrix3(
+        Vector3( 0.0f, vec.getZ(), -vec.getY() ),
+        Vector3( -vec.getZ(), 0.0f, vec.getX() ),
+        Vector3( vec.getY(), -vec.getX(), 0.0f )
+    );
+}
+
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
+
diff --git a/Engine/lib/bullet/src/vectormath/neon/quat_aos.h b/Engine/lib/bullet/src/vectormath/neon/quat_aos.h
new file mode 100644
index 000000000..d06184603
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/neon/quat_aos.h
@@ -0,0 +1,413 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+    inline Quat::Quat( const Quat & quat )
+    {        
+        vXYZW = quat.vXYZW;
+    }
+    
+    inline Quat::Quat( float _x, float _y, float _z, float _w )
+    {        
+        mXYZW[0] = _x;
+        mXYZW[1] = _y;
+        mXYZW[2] = _z;
+        mXYZW[3] = _w;
+    }
+    
+    inline Quat::Quat( float32x4_t fXYZW )  
+    {        
+        vXYZW = fXYZW;
+    }
+    
+    inline Quat::Quat( const Vector3 & xyz, float _w )
+    {        
+        this->setXYZ( xyz );
+        this->setW( _w );
+    }
+    
+    inline Quat::Quat( const Vector4 & vec )
+    {        
+        mXYZW[0] = vec.getX();
+        mXYZW[1] = vec.getY();
+        mXYZW[2] = vec.getZ();
+        mXYZW[3] = vec.getW();
+    }
+    
+    inline Quat::Quat( float scalar )  
+    {        
+        vXYZW = vdupq_n_f32(scalar);
+    }
+    
+    inline const Quat Quat::identity( )
+    {        
+        return Quat( 0.0f, 0.0f, 0.0f, 1.0f );
+    }
+    
+    inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 )
+    {        
+        return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+    }
+    
+    inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 )
+    {
+        Quat start;
+        float recipSinAngle, scale0, scale1, cosAngle, angle;
+        cosAngle = dot( unitQuat0, unitQuat1 );
+        if ( cosAngle < 0.0f ) {
+            cosAngle = -cosAngle;
+            start = ( -unitQuat0 );
+        } else {
+            start = unitQuat0;
+        }
+        if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+            angle = acosf( cosAngle );
+            recipSinAngle = ( 1.0f / sinf( angle ) );
+            scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+            scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+        } else {
+            scale0 = ( 1.0f - t );
+            scale1 = t;
+        }
+        return ( ( start * scale0 ) + ( unitQuat1 * scale1 ) );
+    }
+    
+    inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 )
+    {        
+        Quat tmp0, tmp1;
+        tmp0 = slerp( t, unitQuat0, unitQuat3 );
+        tmp1 = slerp( t, unitQuat1, unitQuat2 );
+        return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
+    }
+    
+    inline void loadXYZW( Quat & quat, const float * fptr )
+    {        
+        quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] );
+    }
+    
+    inline void storeXYZW( const Quat & quat, float * fptr )
+    {        
+        vst1q_f32(fptr, quat.getvXYZW());
+    }
+    
+    inline Quat & Quat::operator =( const Quat & quat )
+    {        
+        vXYZW = quat.getvXYZW();
+        return *this;
+    }
+    
+    inline Quat & Quat::setXYZ( const Vector3 & vec )
+    {        
+        mXYZW[0] = vec.getX();
+        mXYZW[1] = vec.getY();
+        mXYZW[2] = vec.getZ();
+        return *this;
+    }
+    
+    inline const Vector3 Quat::getXYZ( ) const
+    {        
+        return Vector3( mXYZW[0], mXYZW[1], mXYZW[2] );
+    }
+    
+    inline float32x4_t Quat::getvXYZW( ) const
+    {        
+        return vXYZW;
+    }
+    
+    inline Quat & Quat::setX( float _x )
+    {        
+        mXYZW[0] = _x;
+        return *this;
+    }
+    
+    inline float Quat::getX( ) const
+    {        
+        return mXYZW[0];
+    }
+    
+    inline Quat & Quat::setY( float _y )
+    {        
+        mXYZW[1] = _y;
+        return *this;
+    }
+    
+    inline float Quat::getY( ) const
+    {        
+        return mXYZW[1];
+    }
+    
+    inline Quat & Quat::setZ( float _z )
+    {        
+        mXYZW[2] = _z;
+        return *this;
+    }
+    
+    inline float Quat::getZ( ) const
+    {        
+        return mXYZW[2];
+    }
+    
+    inline Quat & Quat::setW( float _w )
+    {        
+        mXYZW[3] = _w;
+        return *this;
+    }
+    
+    inline float Quat::getW( ) const
+    {        
+        return mXYZW[3];
+    }
+    
+    inline Quat & Quat::setElem( int idx, float value )
+    {        
+        *(&mXYZW[0] + idx) = value;
+        return *this;
+    }
+    
+    inline float Quat::getElem( int idx ) const
+    {        
+        return *(&mXYZW[0] + idx);
+    }
+    
+    inline float & Quat::operator []( int idx )
+    {        
+        return *(&mXYZW[0] + idx);
+    }
+    
+    inline float Quat::operator []( int idx ) const
+    {        
+        return *(&mXYZW[0] + idx);
+    }
+    
+    inline const Quat Quat::operator +( const Quat & quat ) const
+    {        
+        return Quat( vaddq_f32(vXYZW, quat.vXYZW) );
+    }
+    
+    inline const Quat Quat::operator -( const Quat & quat ) const
+    {        
+        return Quat( vsubq_f32(vXYZW, quat.vXYZW) );
+    }
+    
+    inline const Quat Quat::operator *( float scalar ) const
+    {        
+        float32x4_t v_scalar = vdupq_n_f32(scalar);
+        return Quat( vmulq_f32(vXYZW, v_scalar) );
+    }
+    
+    inline Quat & Quat::operator +=( const Quat & quat )
+    {        
+        *this = *this + quat;
+        return *this;
+    }
+    
+    inline Quat & Quat::operator -=( const Quat & quat )
+    {
+        *this = *this - quat;
+        return *this;
+    }
+    
+    inline Quat & Quat::operator *=( float scalar )
+    {        
+        *this = *this * scalar;
+        return *this;
+    }
+    
+    inline const Quat Quat::operator /( float scalar ) const
+    {        
+        return Quat(
+                    ( mXYZW[0] / scalar ),
+                    ( mXYZW[1] / scalar ),
+                    ( mXYZW[2] / scalar ),
+                    ( mXYZW[3] / scalar )
+                    );
+    }
+    
+    inline Quat & Quat::operator /=( float scalar )
+    {        
+        *this = *this / scalar;
+        return *this;
+    }
+    
+    inline const Quat Quat::operator -( ) const
+    {        
+        return Quat( vnegq_f32(vXYZW) );
+    }
+    
+    inline const Quat operator *( float scalar, const Quat & quat )
+    {        
+        return quat * scalar;
+    }
+    
+    inline float dot( const Quat & quat0, const Quat & quat1 )
+    {        
+        float result;
+        result = ( quat0.getX() * quat1.getX() );
+        result = ( result + ( quat0.getY() * quat1.getY() ) );
+        result = ( result + ( quat0.getZ() * quat1.getZ() ) );
+        result = ( result + ( quat0.getW() * quat1.getW() ) );
+        return result;
+    }
+    
+    inline float norm( const Quat & quat )
+    {        
+        float result;
+        result = ( quat.getX() * quat.getX() );
+        result = ( result + ( quat.getY() * quat.getY() ) );
+        result = ( result + ( quat.getZ() * quat.getZ() ) );
+        result = ( result + ( quat.getW() * quat.getW() ) );
+        return result;
+    }
+    
+    inline float length( const Quat & quat )
+    {        
+        return ::sqrtf( norm( quat ) );
+    }
+    
+    inline const Quat normalize( const Quat & quat )
+    {        
+        float lenSqr, lenInv;
+        lenSqr = norm( quat );
+        lenInv = ( 1.0f / sqrtf( lenSqr ) );
+        return Quat(
+                    ( quat.getX() * lenInv ),
+                    ( quat.getY() * lenInv ),
+                    ( quat.getZ() * lenInv ),
+                    ( quat.getW() * lenInv )
+                    );
+    }
+    
+    inline const Quat Quat::rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 )
+    {        
+        float cosHalfAngleX2, recipCosHalfAngleX2;
+        cosHalfAngleX2 = sqrtf( ( 2.0f * ( 1.0f + dot( unitVec0, unitVec1 ) ) ) );
+        recipCosHalfAngleX2 = ( 1.0f / cosHalfAngleX2 );
+        return Quat( ( cross( unitVec0, unitVec1 ) * recipCosHalfAngleX2 ), ( cosHalfAngleX2 * 0.5f ) );
+    }
+    
+    inline const Quat Quat::rotation( float radians, const Vector3 & unitVec )
+    {        
+        float s, c, angle;
+        angle = ( radians * 0.5f );
+        s = sinf( angle );
+        c = cosf( angle );
+        return Quat( ( unitVec * s ), c );
+    }
+    
+    inline const Quat Quat::rotationX( float radians )
+    {        
+        float s, c, angle;
+        angle = ( radians * 0.5f );
+        s = sinf( angle );
+        c = cosf( angle );
+        return Quat( s, 0.0f, 0.0f, c );
+    }
+    
+    inline const Quat Quat::rotationY( float radians )
+    {        
+        float s, c, angle;
+        angle = ( radians * 0.5f );
+        s = sinf( angle );
+        c = cosf( angle );
+        return Quat( 0.0f, s, 0.0f, c );
+    }
+    
+    inline const Quat Quat::rotationZ( float radians )
+    {        
+        float s, c, angle;
+        angle = ( radians * 0.5f );
+        s = sinf( angle );
+        c = cosf( angle );
+        return Quat( 0.0f, 0.0f, s, c );
+    }
+    
+    inline const Quat Quat::operator *( const Quat & quat ) const
+    {        
+        return Quat(
+                    ( ( ( ( mXYZW[3] * quat.mXYZW[0] ) + ( mXYZW[0] * quat.mXYZW[3] ) ) + ( mXYZW[1] * quat.mXYZW[2] ) ) - ( mXYZW[2] * quat.mXYZW[1] ) ),
+                    ( ( ( ( mXYZW[3] * quat.mXYZW[1] ) + ( mXYZW[1] * quat.mXYZW[3] ) ) + ( mXYZW[2] * quat.mXYZW[0] ) ) - ( mXYZW[0] * quat.mXYZW[2] ) ),
+                    ( ( ( ( mXYZW[3] * quat.mXYZW[2] ) + ( mXYZW[2] * quat.mXYZW[3] ) ) + ( mXYZW[0] * quat.mXYZW[1] ) ) - ( mXYZW[1] * quat.mXYZW[0] ) ),
+                    ( ( ( ( mXYZW[3] * quat.mXYZW[3] ) - ( mXYZW[0] * quat.mXYZW[0] ) ) - ( mXYZW[1] * quat.mXYZW[1] ) ) - ( mXYZW[2] * quat.mXYZW[2] ) )
+                    );
+    }
+    
+    inline Quat & Quat::operator *=( const Quat & quat )
+    {        
+        *this = *this * quat;
+        return *this;
+    }
+    
+    inline const Vector3 rotate( const Quat & quat, const Vector3 & vec )
+    {
+        float tmpX, tmpY, tmpZ, tmpW;
+        tmpX = ( ( ( quat.getW() * vec.getX() ) + ( quat.getY() * vec.getZ() ) ) - ( quat.getZ() * vec.getY() ) );
+        tmpY = ( ( ( quat.getW() * vec.getY() ) + ( quat.getZ() * vec.getX() ) ) - ( quat.getX() * vec.getZ() ) );
+        tmpZ = ( ( ( quat.getW() * vec.getZ() ) + ( quat.getX() * vec.getY() ) ) - ( quat.getY() * vec.getX() ) );
+        tmpW = ( ( ( quat.getX() * vec.getX() ) + ( quat.getY() * vec.getY() ) ) + ( quat.getZ() * vec.getZ() ) );
+        return Vector3(
+                       ( ( ( ( tmpW * quat.getX() ) + ( tmpX * quat.getW() ) ) - ( tmpY * quat.getZ() ) ) + ( tmpZ * quat.getY() ) ),
+                       ( ( ( ( tmpW * quat.getY() ) + ( tmpY * quat.getW() ) ) - ( tmpZ * quat.getX() ) ) + ( tmpX * quat.getZ() ) ),
+                       ( ( ( ( tmpW * quat.getZ() ) + ( tmpZ * quat.getW() ) ) - ( tmpX * quat.getY() ) ) + ( tmpY * quat.getX() ) )
+                       );
+    }
+    
+    inline const Quat conj( const Quat & quat )
+    {        
+        return Quat( -quat.getX(), -quat.getY(), -quat.getZ(), quat.getW() );
+    }
+    
+    inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 )
+    {
+        return Quat(
+                    ( select1 )? quat1.getX() : quat0.getX(),
+                    ( select1 )? quat1.getY() : quat0.getY(),
+                    ( select1 )? quat1.getZ() : quat0.getZ(),
+                    ( select1 )? quat1.getW() : quat0.getW()
+                    );
+    }
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Quat & quat )
+{
+    printf( "( %f %f %f %f )\n", quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+inline void print( const Quat & quat, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, quat.getX(), quat.getY(), quat.getZ(), quat.getW() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
+
diff --git a/Engine/lib/bullet/src/vectormath/neon/vec_aos.h b/Engine/lib/bullet/src/vectormath/neon/vec_aos.h
new file mode 100644
index 000000000..7bcf8dbec
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/neon/vec_aos.h
@@ -0,0 +1,1427 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Constants
+
+#define _VECTORMATH_SLERP_TOL 0.999f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+inline Vector3::Vector3( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+}
+
+inline Vector3::Vector3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Vector3::Vector3( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+}
+
+inline Vector3::Vector3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Vector3 Vector3::xAxis( )
+{
+    return Vector3( 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::yAxis( )
+{
+    return Vector3( 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector3 Vector3::zAxis( )
+{
+    return Vector3( 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void loadXYZ( Vector3 & vec, const float * fptr )
+{
+    vec = Vector3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Vector3 & vec, float * fptr )
+{
+    fptr[0] = vec.getX();
+    fptr[1] = vec.getY();
+    fptr[2] = vec.getZ();
+}
+
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
+inline Vector3 & Vector3::operator =( const Vector3 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    return *this;
+}
+
+inline Vector3 & Vector3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector3::getX( ) const
+{
+    return mX;
+}
+
+inline Vector3 & Vector3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector3::getY( ) const
+{
+    return mY;
+}
+
+inline Vector3 & Vector3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector3 & Vector3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Vector3::operator +( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ )
+    );
+}
+
+inline const Vector3 Vector3::operator -( const Vector3 & vec ) const
+{
+    return Vector3(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ )
+    );
+}
+
+inline const Point3 Vector3::operator +( const Point3 & pnt ) const
+{
+    return Point3(
+        ( mX + pnt.getX() ),
+        ( mY + pnt.getY() ),
+        ( mZ + pnt.getZ() )
+    );
+}
+
+inline const Vector3 Vector3::operator *( float scalar ) const
+{
+    return Vector3(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator /( float scalar ) const
+{
+    return Vector3(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar )
+    );
+}
+
+inline Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector3 Vector3::operator -( ) const
+{
+    return Vector3(
+        -mX,
+        -mY,
+        -mZ
+    );
+}
+
+inline const Vector3 operator *( float scalar, const Vector3 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() )
+    );
+}
+
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() )
+    );
+}
+
+inline const Vector3 recipPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() )
+    );
+}
+
+inline const Vector3 sqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() )
+    );
+}
+
+inline const Vector3 rsqrtPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) )
+    );
+}
+
+inline const Vector3 absPerElem( const Vector3 & vec )
+{
+    return Vector3(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() )
+    );
+}
+
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() )
+    );
+}
+
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float maxElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    return result;
+}
+
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ()
+    );
+}
+
+inline float minElem( const Vector3 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    return result;
+}
+
+inline float sum( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    return result;
+}
+
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector3 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    return result;
+}
+
+inline float length( const Vector3 & vec )
+{
+    return ::sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector3 normalize( const Vector3 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector3(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv )
+    );
+}
+
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 )
+{
+    return Vector3(
+        ( ( vec0.getY() * vec1.getZ() ) - ( vec0.getZ() * vec1.getY() ) ),
+        ( ( vec0.getZ() * vec1.getX() ) - ( vec0.getX() * vec1.getZ() ) ),
+        ( ( vec0.getX() * vec1.getY() ) - ( vec0.getY() * vec1.getX() ) )
+    );
+}
+
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 )
+{
+    return Vector3(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector3 & vec )
+{
+    printf( "( %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ() );
+}
+
+inline void print( const Vector3 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ() );
+}
+
+#endif
+
+inline Vector4::Vector4( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+}
+
+inline Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+    mW = _w;
+}
+
+inline Vector4::Vector4( const Vector3 & xyz, float _w )
+{
+    this->setXYZ( xyz );
+    this->setW( _w );
+}
+
+inline Vector4::Vector4( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    mW = 0.0f;
+}
+
+inline Vector4::Vector4( const Point3 & pnt )
+{
+    mX = pnt.getX();
+    mY = pnt.getY();
+    mZ = pnt.getZ();
+    mW = 1.0f;
+}
+
+inline Vector4::Vector4( const Quat & quat )
+{
+    mX = quat.getX();
+    mY = quat.getY();
+    mZ = quat.getZ();
+    mW = quat.getW();
+}
+
+inline Vector4::Vector4( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+    mW = scalar;
+}
+
+inline const Vector4 Vector4::xAxis( )
+{
+    return Vector4( 1.0f, 0.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::yAxis( )
+{
+    return Vector4( 0.0f, 1.0f, 0.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::zAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 1.0f, 0.0f );
+}
+
+inline const Vector4 Vector4::wAxis( )
+{
+    return Vector4( 0.0f, 0.0f, 0.0f, 1.0f );
+}
+
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 )
+{
+    float recipSinAngle, scale0, scale1, cosAngle, angle;
+    cosAngle = dot( unitVec0, unitVec1 );
+    if ( cosAngle < _VECTORMATH_SLERP_TOL ) {
+        angle = acosf( cosAngle );
+        recipSinAngle = ( 1.0f / sinf( angle ) );
+        scale0 = ( sinf( ( ( 1.0f - t ) * angle ) ) * recipSinAngle );
+        scale1 = ( sinf( ( t * angle ) ) * recipSinAngle );
+    } else {
+        scale0 = ( 1.0f - t );
+        scale1 = t;
+    }
+    return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
+}
+
+inline void loadXYZW( Vector4 & vec, const float * fptr )
+{
+    vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] );
+}
+
+inline void storeXYZW( const Vector4 & vec, float * fptr )
+{
+    fptr[0] = vec.getX();
+    fptr[1] = vec.getY();
+    fptr[2] = vec.getZ();
+    fptr[3] = vec.getW();
+}
+
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 4; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 4; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
+inline Vector4 & Vector4::operator =( const Vector4 & vec )
+{
+    mX = vec.mX;
+    mY = vec.mY;
+    mZ = vec.mZ;
+    mW = vec.mW;
+    return *this;
+}
+
+inline Vector4 & Vector4::setXYZ( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+    return *this;
+}
+
+inline const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mX, mY, mZ );
+}
+
+inline Vector4 & Vector4::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Vector4::getX( ) const
+{
+    return mX;
+}
+
+inline Vector4 & Vector4::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Vector4::getY( ) const
+{
+    return mY;
+}
+
+inline Vector4 & Vector4::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Vector4::getZ( ) const
+{
+    return mZ;
+}
+
+inline Vector4 & Vector4::setW( float _w )
+{
+    mW = _w;
+    return *this;
+}
+
+inline float Vector4::getW( ) const
+{
+    return mW;
+}
+
+inline Vector4 & Vector4::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Vector4::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Vector4::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Vector4::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector4 Vector4::operator +( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX + vec.mX ),
+        ( mY + vec.mY ),
+        ( mZ + vec.mZ ),
+        ( mW + vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator -( const Vector4 & vec ) const
+{
+    return Vector4(
+        ( mX - vec.mX ),
+        ( mY - vec.mY ),
+        ( mZ - vec.mZ ),
+        ( mW - vec.mW )
+    );
+}
+
+inline const Vector4 Vector4::operator *( float scalar ) const
+{
+    return Vector4(
+        ( mX * scalar ),
+        ( mY * scalar ),
+        ( mZ * scalar ),
+        ( mW * scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator +=( const Vector4 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator -=( const Vector4 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator /( float scalar ) const
+{
+    return Vector4(
+        ( mX / scalar ),
+        ( mY / scalar ),
+        ( mZ / scalar ),
+        ( mW / scalar )
+    );
+}
+
+inline Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+inline const Vector4 Vector4::operator -( ) const
+{
+    return Vector4(
+        -mX,
+        -mY,
+        -mZ,
+        -mW
+    );
+}
+
+inline const Vector4 operator *( float scalar, const Vector4 & vec )
+{
+    return vec * scalar;
+}
+
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() * vec1.getX() ),
+        ( vec0.getY() * vec1.getY() ),
+        ( vec0.getZ() * vec1.getZ() ),
+        ( vec0.getW() * vec1.getW() )
+    );
+}
+
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec0.getX() / vec1.getX() ),
+        ( vec0.getY() / vec1.getY() ),
+        ( vec0.getZ() / vec1.getZ() ),
+        ( vec0.getW() / vec1.getW() )
+    );
+}
+
+inline const Vector4 recipPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / vec.getX() ),
+        ( 1.0f / vec.getY() ),
+        ( 1.0f / vec.getZ() ),
+        ( 1.0f / vec.getW() )
+    );
+}
+
+inline const Vector4 sqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        sqrtf( vec.getX() ),
+        sqrtf( vec.getY() ),
+        sqrtf( vec.getZ() ),
+        sqrtf( vec.getW() )
+    );
+}
+
+inline const Vector4 rsqrtPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        ( 1.0f / sqrtf( vec.getX() ) ),
+        ( 1.0f / sqrtf( vec.getY() ) ),
+        ( 1.0f / sqrtf( vec.getZ() ) ),
+        ( 1.0f / sqrtf( vec.getW() ) )
+    );
+}
+
+inline const Vector4 absPerElem( const Vector4 & vec )
+{
+    return Vector4(
+        fabsf( vec.getX() ),
+        fabsf( vec.getY() ),
+        fabsf( vec.getZ() ),
+        fabsf( vec.getW() )
+    );
+}
+
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        ( vec1.getX() < 0.0f )? -fabsf( vec0.getX() ) : fabsf( vec0.getX() ),
+        ( vec1.getY() < 0.0f )? -fabsf( vec0.getY() ) : fabsf( vec0.getY() ),
+        ( vec1.getZ() < 0.0f )? -fabsf( vec0.getZ() ) : fabsf( vec0.getZ() ),
+        ( vec1.getW() < 0.0f )? -fabsf( vec0.getW() ) : fabsf( vec0.getW() )
+    );
+}
+
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() > vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() > vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() > vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() > vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float maxElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() > vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() > result)? vec.getZ() : result;
+    result = (vec.getW() > result)? vec.getW() : result;
+    return result;
+}
+
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    return Vector4(
+        (vec0.getX() < vec1.getX())? vec0.getX() : vec1.getX(),
+        (vec0.getY() < vec1.getY())? vec0.getY() : vec1.getY(),
+        (vec0.getZ() < vec1.getZ())? vec0.getZ() : vec1.getZ(),
+        (vec0.getW() < vec1.getW())? vec0.getW() : vec1.getW()
+    );
+}
+
+inline float minElem( const Vector4 & vec )
+{
+    float result;
+    result = (vec.getX() < vec.getY())? vec.getX() : vec.getY();
+    result = (vec.getZ() < result)? vec.getZ() : result;
+    result = (vec.getW() < result)? vec.getW() : result;
+    return result;
+}
+
+inline float sum( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() + vec.getY() );
+    result = ( result + vec.getZ() );
+    result = ( result + vec.getW() );
+    return result;
+}
+
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 )
+{
+    float result;
+    result = ( vec0.getX() * vec1.getX() );
+    result = ( result + ( vec0.getY() * vec1.getY() ) );
+    result = ( result + ( vec0.getZ() * vec1.getZ() ) );
+    result = ( result + ( vec0.getW() * vec1.getW() ) );
+    return result;
+}
+
+inline float lengthSqr( const Vector4 & vec )
+{
+    float result;
+    result = ( vec.getX() * vec.getX() );
+    result = ( result + ( vec.getY() * vec.getY() ) );
+    result = ( result + ( vec.getZ() * vec.getZ() ) );
+    result = ( result + ( vec.getW() * vec.getW() ) );
+    return result;
+}
+
+inline float length( const Vector4 & vec )
+{
+    return ::sqrtf( lengthSqr( vec ) );
+}
+
+inline const Vector4 normalize( const Vector4 & vec )
+{
+    float lenSqr, lenInv;
+    lenSqr = lengthSqr( vec );
+    lenInv = ( 1.0f / sqrtf( lenSqr ) );
+    return Vector4(
+        ( vec.getX() * lenInv ),
+        ( vec.getY() * lenInv ),
+        ( vec.getZ() * lenInv ),
+        ( vec.getW() * lenInv )
+    );
+}
+
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 )
+{
+    return Vector4(
+        ( select1 )? vec1.getX() : vec0.getX(),
+        ( select1 )? vec1.getY() : vec0.getY(),
+        ( select1 )? vec1.getZ() : vec0.getZ(),
+        ( select1 )? vec1.getW() : vec0.getW()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Vector4 & vec )
+{
+    printf( "( %f %f %f %f )\n", vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+inline void print( const Vector4 & vec, const char * name )
+{
+    printf( "%s: ( %f %f %f %f )\n", name, vec.getX(), vec.getY(), vec.getZ(), vec.getW() );
+}
+
+#endif
+
+inline Point3::Point3( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+}
+
+inline Point3::Point3( float _x, float _y, float _z )
+{
+    mX = _x;
+    mY = _y;
+    mZ = _z;
+}
+
+inline Point3::Point3( const Vector3 & vec )
+{
+    mX = vec.getX();
+    mY = vec.getY();
+    mZ = vec.getZ();
+}
+
+inline Point3::Point3( float scalar )
+{
+    mX = scalar;
+    mY = scalar;
+    mZ = scalar;
+}
+
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+inline void loadXYZ( Point3 & pnt, const float * fptr )
+{
+    pnt = Point3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Point3 & pnt, float * fptr )
+{
+    fptr[0] = pnt.getX();
+    fptr[1] = pnt.getY();
+    fptr[2] = pnt.getZ();
+}
+
+inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
+inline Point3 & Point3::operator =( const Point3 & pnt )
+{
+    mX = pnt.mX;
+    mY = pnt.mY;
+    mZ = pnt.mZ;
+    return *this;
+}
+
+inline Point3 & Point3::setX( float _x )
+{
+    mX = _x;
+    return *this;
+}
+
+inline float Point3::getX( ) const
+{
+    return mX;
+}
+
+inline Point3 & Point3::setY( float _y )
+{
+    mY = _y;
+    return *this;
+}
+
+inline float Point3::getY( ) const
+{
+    return mY;
+}
+
+inline Point3 & Point3::setZ( float _z )
+{
+    mZ = _z;
+    return *this;
+}
+
+inline float Point3::getZ( ) const
+{
+    return mZ;
+}
+
+inline Point3 & Point3::setElem( int idx, float value )
+{
+    *(&mX + idx) = value;
+    return *this;
+}
+
+inline float Point3::getElem( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline float & Point3::operator []( int idx )
+{
+    return *(&mX + idx);
+}
+
+inline float Point3::operator []( int idx ) const
+{
+    return *(&mX + idx);
+}
+
+inline const Vector3 Point3::operator -( const Point3 & pnt ) const
+{
+    return Vector3(
+        ( mX - pnt.mX ),
+        ( mY - pnt.mY ),
+        ( mZ - pnt.mZ )
+    );
+}
+
+inline const Point3 Point3::operator +( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX + vec.getX() ),
+        ( mY + vec.getY() ),
+        ( mZ + vec.getZ() )
+    );
+}
+
+inline const Point3 Point3::operator -( const Vector3 & vec ) const
+{
+    return Point3(
+        ( mX - vec.getX() ),
+        ( mY - vec.getY() ),
+        ( mZ - vec.getZ() )
+    );
+}
+
+inline Point3 & Point3::operator +=( const Vector3 & vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline Point3 & Point3::operator -=( const Vector3 & vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() * pnt1.getX() ),
+        ( pnt0.getY() * pnt1.getY() ),
+        ( pnt0.getZ() * pnt1.getZ() )
+    );
+}
+
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt0.getX() / pnt1.getX() ),
+        ( pnt0.getY() / pnt1.getY() ),
+        ( pnt0.getZ() / pnt1.getZ() )
+    );
+}
+
+inline const Point3 recipPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / pnt.getX() ),
+        ( 1.0f / pnt.getY() ),
+        ( 1.0f / pnt.getZ() )
+    );
+}
+
+inline const Point3 sqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        sqrtf( pnt.getX() ),
+        sqrtf( pnt.getY() ),
+        sqrtf( pnt.getZ() )
+    );
+}
+
+inline const Point3 rsqrtPerElem( const Point3 & pnt )
+{
+    return Point3(
+        ( 1.0f / sqrtf( pnt.getX() ) ),
+        ( 1.0f / sqrtf( pnt.getY() ) ),
+        ( 1.0f / sqrtf( pnt.getZ() ) )
+    );
+}
+
+inline const Point3 absPerElem( const Point3 & pnt )
+{
+    return Point3(
+        fabsf( pnt.getX() ),
+        fabsf( pnt.getY() ),
+        fabsf( pnt.getZ() )
+    );
+}
+
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        ( pnt1.getX() < 0.0f )? -fabsf( pnt0.getX() ) : fabsf( pnt0.getX() ),
+        ( pnt1.getY() < 0.0f )? -fabsf( pnt0.getY() ) : fabsf( pnt0.getY() ),
+        ( pnt1.getZ() < 0.0f )? -fabsf( pnt0.getZ() ) : fabsf( pnt0.getZ() )
+    );
+}
+
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() > pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() > pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() > pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float maxElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() > pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() > result)? pnt.getZ() : result;
+    return result;
+}
+
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return Point3(
+        (pnt0.getX() < pnt1.getX())? pnt0.getX() : pnt1.getX(),
+        (pnt0.getY() < pnt1.getY())? pnt0.getY() : pnt1.getY(),
+        (pnt0.getZ() < pnt1.getZ())? pnt0.getZ() : pnt1.getZ()
+    );
+}
+
+inline float minElem( const Point3 & pnt )
+{
+    float result;
+    result = (pnt.getX() < pnt.getY())? pnt.getX() : pnt.getY();
+    result = (pnt.getZ() < result)? pnt.getZ() : result;
+    return result;
+}
+
+inline float sum( const Point3 & pnt )
+{
+    float result;
+    result = ( pnt.getX() + pnt.getY() );
+    result = ( result + pnt.getZ() );
+    return result;
+}
+
+inline const Point3 scale( const Point3 & pnt, float scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+inline float projection( const Point3 & pnt, const Vector3 & unitVec )
+{
+    float result;
+    result = ( pnt.getX() * unitVec.getX() );
+    result = ( result + ( pnt.getY() * unitVec.getY() ) );
+    result = ( result + ( pnt.getZ() * unitVec.getZ() ) );
+    return result;
+}
+
+inline float distSqrFromOrigin( const Point3 & pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+inline float distFromOrigin( const Point3 & pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 )
+{
+    return Point3(
+        ( select1 )? pnt1.getX() : pnt0.getX(),
+        ( select1 )? pnt1.getY() : pnt0.getY(),
+        ( select1 )? pnt1.getZ() : pnt0.getZ()
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+inline void print( const Point3 & pnt )
+{
+    printf( "( %f %f %f )\n", pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+inline void print( const Point3 & pnt, const char * name )
+{
+    printf( "%s: ( %f %f %f )\n", name, pnt.getX(), pnt.getY(), pnt.getZ() );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
+
diff --git a/Engine/lib/bullet/src/vectormath/neon/vectormath_aos.h b/Engine/lib/bullet/src/vectormath/neon/vectormath_aos.h
new file mode 100644
index 000000000..97bdc278a
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/neon/vectormath_aos.h
@@ -0,0 +1,1890 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+ 
+This source version has been altered.
+
+*/
+
+#ifndef _VECTORMATH_AOS_CPP_H
+#define _VECTORMATH_AOS_CPP_H
+
+#include <math.h>
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector3( ) { };
+
+    // Copy a 3-D vector
+    // 
+    inline Vector3( const Vector3 & vec );
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    inline Vector3( float x, float y, float z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit inline Vector3( const Point3 & pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit inline Vector3( float scalar );
+
+    // Assign one 3-D vector to another
+    // 
+    inline Vector3 & operator =( const Vector3 & vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    inline Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    inline Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    inline Vector3 & setZ( float z );
+
+    // Get the x element of a 3-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    inline Vector3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    inline const Vector3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    inline const Vector3 operator -( const Vector3 & vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    inline const Point3 operator +( const Point3 & pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    inline const Vector3 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Vector3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Vector3 & operator -=( const Vector3 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector3 & operator /=( float scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    inline const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector3 zAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 3-D vector by a scalar
+// 
+inline const Vector3 operator *( float scalar, const Vector3 & vec );
+
+// Multiply two 3-D vectors per element
+// 
+inline const Vector3 mulPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector3 divPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector3 recipPerElem( const Vector3 & vec );
+
+// Compute the square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector3 sqrtPerElem( const Vector3 & vec );
+
+// Compute the reciprocal square root of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector3 rsqrtPerElem( const Vector3 & vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+inline const Vector3 absPerElem( const Vector3 & vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+inline const Vector3 copySignPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+inline const Vector3 maxPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+inline const Vector3 minPerElem( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Maximum element of a 3-D vector
+// 
+inline float maxElem( const Vector3 & vec );
+
+// Minimum element of a 3-D vector
+// 
+inline float minElem( const Vector3 & vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+inline float sum( const Vector3 & vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+inline float dot( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+inline float lengthSqr( const Vector3 & vec );
+
+// Compute the length of a 3-D vector
+// 
+inline float length( const Vector3 & vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector3 normalize( const Vector3 & vec );
+
+// Compute cross product of two 3-D vectors
+// 
+inline const Vector3 cross( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Outer product of two 3-D vectors
+// 
+inline const Matrix3 outer( const Vector3 & vec0, const Vector3 & vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// 
+inline const Vector3 rowMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+inline const Matrix3 crossMatrix( const Vector3 & vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+inline const Matrix3 crossMatrixMul( const Vector3 & vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 lerp( float t, const Vector3 & vec0, const Vector3 & vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// 
+inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
+
+// Load x, y, and z elements from the first three words of a float array.
+// 
+// 
+inline void loadXYZ( Vector3 & vec, const float * fptr );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZ( const Vector3 & vec, float * fptr );
+
+// Load three-half-floats as a 3-D vector
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr );
+
+// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector3 & vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Vector4( ) { };
+
+    // Copy a 4-D vector
+    // 
+    inline Vector4( const Vector4 & vec );
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    inline Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    inline Vector4( const Vector3 & xyz, float w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit inline Vector4( const Vector3 & vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit inline Vector4( const Point3 & pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit inline Vector4( const Quat & quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit inline Vector4( float scalar );
+
+    // Assign one 4-D vector to another
+    // 
+    inline Vector4 & operator =( const Vector4 & vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Vector4 & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    inline Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    inline Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    inline Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    inline Vector4 & setW( float w );
+
+    // Get the x element of a 4-D vector
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline Vector4 & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    inline const Vector4 operator +( const Vector4 & vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    inline const Vector4 operator -( const Vector4 & vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    inline const Vector4 operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    inline Vector4 & operator +=( const Vector4 & vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    inline Vector4 & operator -=( const Vector4 & vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Vector4 & operator /=( float scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    inline const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static inline const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static inline const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static inline const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static inline const Vector4 wAxis( );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a 4-D vector by a scalar
+// 
+inline const Vector4 operator *( float scalar, const Vector4 & vec );
+
+// Multiply two 4-D vectors per element
+// 
+inline const Vector4 mulPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Vector4 divPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Vector4 recipPerElem( const Vector4 & vec );
+
+// Compute the square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Vector4 sqrtPerElem( const Vector4 & vec );
+
+// Compute the reciprocal square root of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Vector4 rsqrtPerElem( const Vector4 & vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+inline const Vector4 absPerElem( const Vector4 & vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+inline const Vector4 copySignPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+inline const Vector4 maxPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+inline const Vector4 minPerElem( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Maximum element of a 4-D vector
+// 
+inline float maxElem( const Vector4 & vec );
+
+// Minimum element of a 4-D vector
+// 
+inline float minElem( const Vector4 & vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+inline float sum( const Vector4 & vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+inline float dot( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+inline float lengthSqr( const Vector4 & vec );
+
+// Compute the length of a 4-D vector
+// 
+inline float length( const Vector4 & vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+inline const Vector4 normalize( const Vector4 & vec );
+
+// Outer product of two 4-D vectors
+// 
+inline const Matrix4 outer( const Vector4 & vec0, const Vector4 & vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 lerp( float t, const Vector4 & vec0, const Vector4 & vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// 
+inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
+
+// Load x, y, z, and w elements from the first four words of a float array.
+// 
+// 
+inline void loadXYZW( Vector4 & vec, const float * fptr );
+
+// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZW( const Vector4 & vec, float * fptr );
+
+// Load four-half-floats as a 4-D vector
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr );
+
+// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Vector4 & vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    float mX;
+    float mY;
+    float mZ;
+#ifndef __GNUC__
+    float d;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Point3( ) { };
+
+    // Copy a 3-D point
+    // 
+    inline Point3( const Point3 & pnt );
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    inline Point3( float x, float y, float z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit inline Point3( const Vector3 & vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit inline Point3( float scalar );
+
+    // Assign one 3-D point to another
+    // 
+    inline Point3 & operator =( const Point3 & pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    inline Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    inline Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    inline Point3 & setZ( float z );
+
+    // Get the x element of a 3-D point
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    inline float getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    inline Point3 & setElem( int idx, float value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    inline const Vector3 operator -( const Point3 & pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    inline const Point3 operator +( const Vector3 & vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    inline const Point3 operator -( const Vector3 & vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    inline Point3 & operator +=( const Vector3 & vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    inline Point3 & operator -=( const Vector3 & vec );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply two 3-D points per element
+// 
+inline const Point3 mulPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+inline const Point3 divPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+inline const Point3 recipPerElem( const Point3 & pnt );
+
+// Compute the square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function sqrtf4.
+// 
+inline const Point3 sqrtPerElem( const Point3 & pnt );
+
+// Compute the reciprocal square root of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function rsqrtf4.
+// 
+inline const Point3 rsqrtPerElem( const Point3 & pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+inline const Point3 absPerElem( const Point3 & pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+inline const Point3 copySignPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+inline const Point3 maxPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+inline const Point3 minPerElem( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Maximum element of a 3-D point
+// 
+inline float maxElem( const Point3 & pnt );
+
+// Minimum element of a 3-D point
+// 
+inline float minElem( const Point3 & pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+inline float sum( const Point3 & pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, float scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+inline const Point3 scale( const Point3 & pnt, const Vector3 & scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+inline float projection( const Point3 & pnt, const Vector3 & unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distSqrFromOrigin( const Point3 & pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+inline float distFromOrigin( const Point3 & pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+inline float distSqr( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+inline float dist( const Point3 & pnt0, const Point3 & pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
+
+// Conditionally select between two 3-D points
+// 
+inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
+
+// Load x, y, and z elements from the first three words of a float array.
+// 
+// 
+inline void loadXYZ( Point3 & pnt, const float * fptr );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZ( const Point3 & pnt, float * fptr );
+
+// Load three-half-floats as a 3-D point
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr );
+
+// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Point3 & pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+#if defined( __APPLE__ ) && defined( BT_USE_NEON )
+    union{
+        float32x4_t vXYZW;
+        float mXYZW[4];
+    };
+#else
+    float mX;
+    float mY;
+    float mZ;
+    float mW;
+#endif
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Quat( ) { };
+
+    // Copy a quaternion
+    // 
+    inline Quat( const Quat & quat );
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    inline Quat( float x, float y, float z, float w );
+    
+    // Construct a quaternion from vector of x, y, z, and w elements
+    // 
+    inline Quat( float32x4_t fXYZW );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    inline Quat( const Vector3 & xyz, float w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit inline Quat( const Vector4 & vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit inline Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit inline Quat( float scalar );
+    
+    // Assign one quaternion to another
+    // 
+    inline Quat & operator =( const Quat & quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    inline Quat & setXYZ( const Vector3 & vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    inline const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    inline Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    inline Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    inline Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    inline Quat & setW( float w );
+
+#if defined( __APPLE__ ) && defined( BT_USE_NEON )
+    inline float32x4_t getvXYZW( ) const;
+#endif
+    
+    // Get the x element of a quaternion
+    // 
+    inline float getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    inline float getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    inline float getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    inline float getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    inline Quat & setElem( int idx, float value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    inline float getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    inline float & operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    inline float operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    inline const Quat operator +( const Quat & quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    inline const Quat operator -( const Quat & quat ) const;
+
+    // Multiply two quaternions
+    // 
+    inline const Quat operator *( const Quat & quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    inline const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    inline const Quat operator /( float scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    inline Quat & operator +=( const Quat & quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    inline Quat & operator -=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    inline Quat & operator *=( const Quat & quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    inline Quat & operator /=( float scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    inline const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static inline const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static inline const Quat rotation( const Vector3 & unitVec0, const Vector3 & unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static inline const Quat rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static inline const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static inline const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static inline const Quat rotationZ( float radians );
+
+}
+#ifdef __GNUC__
+__attribute__ ((aligned(16)))
+#endif
+;
+
+// Multiply a quaternion by a scalar
+// 
+inline const Quat operator *( float scalar, const Quat & quat );
+
+// Compute the conjugate of a quaternion
+// 
+inline const Quat conj( const Quat & quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+inline const Vector3 rotate( const Quat & unitQuat, const Vector3 & vec );
+
+// Compute the dot product of two quaternions
+// 
+inline float dot( const Quat & quat0, const Quat & quat1 );
+
+// Compute the norm of a quaternion
+// 
+inline float norm( const Quat & quat );
+
+// Compute the length of a quaternion
+// 
+inline float length( const Quat & quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+inline const Quat normalize( const Quat & quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat lerp( float t, const Quat & quat0, const Quat & quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+inline const Quat slerp( float t, const Quat & unitQuat0, const Quat & unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1, const Quat & unitQuat2, const Quat & unitQuat3 );
+
+// Conditionally select between two quaternions
+// 
+inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
+
+// Load x, y, z, and w elements from the first four words of a float array.
+// 
+// 
+inline void loadXYZW( Quat & quat, const float * fptr );
+
+// Store x, y, z, and w elements of a quaternion in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZW( const Quat & quat, float * fptr );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Quat & quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    inline Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    inline Matrix3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit inline Matrix3( const Quat & unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit inline Matrix3( float scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    inline Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    inline Matrix3 & setCol2( const Vector3 & col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline Matrix3 & setRow( int row, const Vector3 & vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    inline const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline Matrix3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    inline const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    inline const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    inline const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    inline const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    inline const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    inline Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    inline Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    inline Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static inline const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static inline const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static inline const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static inline const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static inline const Matrix3 scale( const Vector3 & scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+inline const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 appendScale( const Matrix3 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix3 prependScale( const Vector3 & scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+inline const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+inline const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+inline const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+inline float determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// 
+inline const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    inline Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    inline Matrix4( const Vector4 & col0, const Vector4 & col1, const Vector4 & col2, const Vector4 & col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit inline Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Matrix4( const Matrix3 & mat, const Vector3 & translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Matrix4( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit inline Matrix4( float scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    inline Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    inline Matrix4 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol0( const Vector4 & col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol1( const Vector4 & col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol2( const Vector4 & col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    inline Matrix4 & setCol3( const Vector4 & col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    inline const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setCol( int col, const Vector4 & vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline Matrix4 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline Matrix4 & setElem( int col, int row, float val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    inline const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    inline const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    inline const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    inline const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    inline const Vector4 operator *( const Vector4 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    inline const Vector4 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    inline const Vector4 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    inline const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    inline const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    inline Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    inline Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    inline Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    inline Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static inline const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static inline const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static inline const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static inline const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Matrix4 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Matrix4 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Matrix4 rotation( const Quat & unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static inline const Matrix4 scale( const Vector3 & scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static inline const Matrix4 translation( const Vector3 & translateVec );
+
+    // Construct viewing matrix based on eye position, position looked at, and up direction
+    // 
+    static inline const Matrix4 lookAt( const Point3 & eyePos, const Point3 & lookAtPos, const Vector3 & upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static inline const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static inline const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static inline const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+inline const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 appendScale( const Matrix4 & mat, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Matrix4 prependScale( const Vector3 & scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+inline const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+inline const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+inline const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+inline const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+inline const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+inline float determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// 
+inline const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    inline Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    inline Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    inline Transform3( const Vector3 & col0, const Vector3 & col1, const Vector3 & col2, const Vector3 & col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    inline Transform3( const Matrix3 & tfrm, const Vector3 & translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    inline Transform3( const Quat & unitQuat, const Vector3 & translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit inline Transform3( float scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    inline Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    inline Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    inline const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    inline Transform3 & setTranslation( const Vector3 & translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol0( const Vector3 & col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol1( const Vector3 & col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol2( const Vector3 & col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    inline Transform3 & setCol3( const Vector3 & col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    inline const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setCol( int col, const Vector3 & vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline Transform3 & setRow( int row, const Vector4 & vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    inline const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    inline Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    inline const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline Transform3 & setElem( int col, int row, float val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    inline float getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    inline const Vector3 operator *( const Vector3 & vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    inline const Point3 operator *( const Point3 & pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    inline const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    inline Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static inline const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static inline const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static inline const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static inline const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static inline const Transform3 rotationZYX( const Vector3 & radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static inline const Transform3 rotation( float radians, const Vector3 & unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static inline const Transform3 rotation( const Quat & unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static inline const Transform3 scale( const Vector3 & scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static inline const Transform3 translation( const Vector3 & translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 appendScale( const Transform3 & tfrm, const Vector3 & scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+inline const Transform3 prependScale( const Vector3 & scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+inline const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+inline const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+inline const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+inline const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// 
+inline const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+inline void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
+
diff --git a/Engine/lib/bullet/src/vectormath/scalar/boolInVec.h b/Engine/lib/bullet/src/vectormath/scalar/boolInVec.h
new file mode 100644
index 000000000..c5eeeebd7
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/scalar/boolInVec.h
@@ -0,0 +1,225 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+private:
+    unsigned int mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline boolInVec( ) { };
+
+    // Construct from a value converted from float
+    //
+    inline boolInVec(floatInVec vec);
+
+    // Explicit cast from bool
+    //
+    explicit inline boolInVec(bool scalar);
+
+    // Explicit cast to bool
+    //
+    inline bool getAsBool() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to bool
+    //
+    inline operator bool() const;
+#endif
+
+    // Boolean negation operator
+    //
+    inline const boolInVec operator ! () const;
+
+    // Assignment operator
+    //
+    inline boolInVec& operator = (boolInVec vec);
+
+    // Boolean and assignment operator
+    //
+    inline boolInVec& operator &= (boolInVec vec);
+
+    // Boolean exclusive or assignment operator
+    //
+    inline boolInVec& operator ^= (boolInVec vec);
+
+    // Boolean or assignment operator
+    //
+    inline boolInVec& operator |= (boolInVec vec);
+
+};
+
+// Equal operator
+//
+inline const boolInVec operator == (boolInVec vec0, boolInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (boolInVec vec0, boolInVec vec1);
+
+// And operator
+//
+inline const boolInVec operator & (boolInVec vec0, boolInVec vec1);
+
+// Exclusive or operator
+//
+inline const boolInVec operator ^ (boolInVec vec0, boolInVec vec1);
+
+// Or operator
+//
+inline const boolInVec operator | (boolInVec vec0, boolInVec vec1);
+
+// Conditionally select between two values
+//
+inline const boolInVec select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(floatInVec vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    mData = -(int)scalar;
+}
+
+inline
+bool
+boolInVec::getAsBool() const
+{
+    return (mData > 0);
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+boolInVec::operator bool() const
+{
+    return getAsBool();
+}
+#endif
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(!mData);
+}
+
+inline
+boolInVec&
+boolInVec::operator = (boolInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (boolInVec vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (boolInVec vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (boolInVec vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() == vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator != (boolInVec vec0, boolInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const boolInVec
+operator & (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() & vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator | (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() | vec1.getAsBool());
+}
+
+inline
+const boolInVec
+operator ^ (boolInVec vec0, boolInVec vec1)
+{
+    return boolInVec(vec0.getAsBool() ^ vec1.getAsBool());
+}
+
+inline
+const boolInVec
+select(boolInVec vec0, boolInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/Engine/lib/bullet/src/vectormath/scalar/floatInVec.h b/Engine/lib/bullet/src/vectormath/scalar/floatInVec.h
new file mode 100644
index 000000000..12d89e43d
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/scalar/floatInVec.h
@@ -0,0 +1,343 @@
+/*
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+*/
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+// A class representing a scalar float value contained in a vector register
+// This class does not support fastmath
+class floatInVec
+{
+private:
+    float mData;
+
+public:
+    // Default constructor; does no initialization
+    //
+    inline floatInVec( ) { };
+
+    // Construct from a value converted from bool
+    //
+    inline floatInVec(boolInVec vec);
+
+    // Explicit cast from float
+    //
+    explicit inline floatInVec(float scalar);
+
+    // Explicit cast to float
+    //
+    inline float getAsFloat() const;
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+    // Implicit cast to float
+    //
+    inline operator float() const;
+#endif
+
+    // Post increment (add 1.0f)
+    //
+    inline const floatInVec operator ++ (int);
+
+    // Post decrement (subtract 1.0f)
+    //
+    inline const floatInVec operator -- (int);
+
+    // Pre increment (add 1.0f)
+    //
+    inline floatInVec& operator ++ ();
+
+    // Pre decrement (subtract 1.0f)
+    //
+    inline floatInVec& operator -- ();
+
+    // Negation operator
+    //
+    inline const floatInVec operator - () const;
+
+    // Assignment operator
+    //
+    inline floatInVec& operator = (floatInVec vec);
+
+    // Multiplication assignment operator
+    //
+    inline floatInVec& operator *= (floatInVec vec);
+
+    // Division assignment operator
+    //
+    inline floatInVec& operator /= (floatInVec vec);
+
+    // Addition assignment operator
+    //
+    inline floatInVec& operator += (floatInVec vec);
+
+    // Subtraction assignment operator
+    //
+    inline floatInVec& operator -= (floatInVec vec);
+
+};
+
+// Multiplication operator
+//
+inline const floatInVec operator * (floatInVec vec0, floatInVec vec1);
+
+// Division operator
+//
+inline const floatInVec operator / (floatInVec vec0, floatInVec vec1);
+
+// Addition operator
+//
+inline const floatInVec operator + (floatInVec vec0, floatInVec vec1);
+
+// Subtraction operator
+//
+inline const floatInVec operator - (floatInVec vec0, floatInVec vec1);
+
+// Less than operator
+//
+inline const boolInVec operator < (floatInVec vec0, floatInVec vec1);
+
+// Less than or equal operator
+//
+inline const boolInVec operator <= (floatInVec vec0, floatInVec vec1);
+
+// Greater than operator
+//
+inline const boolInVec operator > (floatInVec vec0, floatInVec vec1);
+
+// Greater than or equal operator
+//
+inline const boolInVec operator >= (floatInVec vec0, floatInVec vec1);
+
+// Equal operator
+//
+inline const boolInVec operator == (floatInVec vec0, floatInVec vec1);
+
+// Not equal operator
+//
+inline const boolInVec operator != (floatInVec vec0, floatInVec vec1);
+
+// Conditionally select between two values
+//
+inline const floatInVec select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1);
+
+
+} // namespace Vectormath
+
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(boolInVec vec)
+{
+    mData = float(vec.getAsBool());
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+    mData = scalar;
+}
+
+inline
+float
+floatInVec::getAsFloat() const
+{
+    return mData;
+}
+
+#ifndef _VECTORMATH_NO_SCALAR_CAST
+inline
+floatInVec::operator float() const
+{
+    return getAsFloat();
+}
+#endif
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    float olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    float olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(1.0f);
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(1.0f);
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec(-mData);
+}
+
+inline
+floatInVec&
+floatInVec::operator = (floatInVec vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (floatInVec vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (floatInVec vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (floatInVec vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (floatInVec vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() * vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator / (floatInVec num, floatInVec den)
+{
+    return floatInVec(num.getAsFloat() / den.getAsFloat());
+}
+
+inline
+const floatInVec
+operator + (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() + vec1.getAsFloat());
+}
+
+inline
+const floatInVec
+operator - (floatInVec vec0, floatInVec vec1)
+{
+    return floatInVec(vec0.getAsFloat() - vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator < (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() < vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator <= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 > vec1);
+}
+
+inline
+const boolInVec
+operator > (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() > vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator >= (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 < vec1);
+}
+
+inline
+const boolInVec
+operator == (floatInVec vec0, floatInVec vec1)
+{
+    return boolInVec(vec0.getAsFloat() == vec1.getAsFloat());
+}
+
+inline
+const boolInVec
+operator != (floatInVec vec0, floatInVec vec1)
+{
+    return !(vec0 == vec1);
+}
+
+inline
+const floatInVec
+select(floatInVec vec0, floatInVec vec1, boolInVec select_vec1)
+{
+    return (select_vec1.getAsBool() == 0) ? vec0 : vec1;
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h b/Engine/lib/bullet/src/vectormath/scalar/mat_aos.h
similarity index 96%
rename from Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h
rename to Engine/lib/bullet/src/vectormath/scalar/mat_aos.h
index 5d5d012d9..e103243d1 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/mat_aos.h
+++ b/Engine/lib/bullet/src/vectormath/scalar/mat_aos.h
@@ -1,30 +1,17 @@
 /*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
    All rights reserved.
 
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef _VECTORMATH_MAT_AOS_CPP_H
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h b/Engine/lib/bullet/src/vectormath/scalar/quat_aos.h
similarity index 85%
rename from Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h
rename to Engine/lib/bullet/src/vectormath/scalar/quat_aos.h
index 7f1e8822b..764e01708 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/quat_aos.h
+++ b/Engine/lib/bullet/src/vectormath/scalar/quat_aos.h
@@ -1,34 +1,22 @@
 /*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
    All rights reserved.
 
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef _VECTORMATH_QUAT_AOS_CPP_H
 #define _VECTORMATH_QUAT_AOS_CPP_H
+
 //-----------------------------------------------------------------------------
 // Definitions
 
@@ -119,6 +107,19 @@ inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1
     return slerp( ( ( 2.0f * t ) * ( 1.0f - t ) ), tmp0, tmp1 );
 }
 
+inline void loadXYZW( Quat & quat, const float * fptr )
+{
+    quat = Quat( fptr[0], fptr[1], fptr[2], fptr[3] );
+}
+
+inline void storeXYZW( const Quat & quat, float * fptr )
+{
+    fptr[0] = quat.getX();
+    fptr[1] = quat.getY();
+    fptr[2] = quat.getZ();
+    fptr[3] = quat.getW();
+}
+
 inline Quat & Quat::operator =( const Quat & quat )
 {
     mX = quat.mX;
@@ -307,7 +308,7 @@ inline float norm( const Quat & quat )
 
 inline float length( const Quat & quat )
 {
-    return sqrtf( norm( quat ) );
+    return ::sqrtf( norm( quat ) );
 }
 
 inline const Quat normalize( const Quat & quat )
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h b/Engine/lib/bullet/src/vectormath/scalar/vec_aos.h
similarity index 77%
rename from Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h
rename to Engine/lib/bullet/src/vectormath/scalar/vec_aos.h
index a1a75333a..46d4d6b3e 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/vec_aos.h
+++ b/Engine/lib/bullet/src/vectormath/scalar/vec_aos.h
@@ -1,34 +1,22 @@
 /*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
    All rights reserved.
 
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
 */
 
 #ifndef _VECTORMATH_VEC_AOS_CPP_H
 #define _VECTORMATH_VEC_AOS_CPP_H
+
 //-----------------------------------------------------------------------------
 // Constants
 
@@ -109,6 +97,94 @@ inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & u
     return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
 }
 
+inline void loadXYZ( Vector3 & vec, const float * fptr )
+{
+    vec = Vector3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Vector3 & vec, float * fptr )
+{
+    fptr[0] = vec.getX();
+    fptr[1] = vec.getY();
+    fptr[2] = vec.getZ();
+}
+
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
 inline Vector3 & Vector3::operator =( const Vector3 & vec )
 {
     mX = vec.mX;
@@ -379,7 +455,7 @@ inline float lengthSqr( const Vector3 & vec )
 
 inline float length( const Vector3 & vec )
 {
-    return sqrtf( lengthSqr( vec ) );
+    return ::sqrtf( lengthSqr( vec ) );
 }
 
 inline const Vector3 normalize( const Vector3 & vec )
@@ -521,6 +597,95 @@ inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & u
     return ( ( unitVec0 * scale0 ) + ( unitVec1 * scale1 ) );
 }
 
+inline void loadXYZW( Vector4 & vec, const float * fptr )
+{
+    vec = Vector4( fptr[0], fptr[1], fptr[2], fptr[3] );
+}
+
+inline void storeXYZW( const Vector4 & vec, float * fptr )
+{
+    fptr[0] = vec.getX();
+    fptr[1] = vec.getY();
+    fptr[2] = vec.getZ();
+    fptr[3] = vec.getW();
+}
+
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 4; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 4; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
 inline Vector4 & Vector4::operator =( const Vector4 & vec )
 {
     mX = vec.mX;
@@ -826,7 +991,7 @@ inline float lengthSqr( const Vector4 & vec )
 
 inline float length( const Vector4 & vec )
 {
-    return sqrtf( lengthSqr( vec ) );
+    return ::sqrtf( lengthSqr( vec ) );
 }
 
 inline const Vector4 normalize( const Vector4 & vec )
@@ -899,6 +1064,94 @@ inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 )
     return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
 }
 
+inline void loadXYZ( Point3 & pnt, const float * fptr )
+{
+    pnt = Point3( fptr[0], fptr[1], fptr[2] );
+}
+
+inline void storeXYZ( const Point3 & pnt, float * fptr )
+{
+    fptr[0] = pnt.getX();
+    fptr[1] = pnt.getY();
+    fptr[2] = pnt.getZ();
+}
+
+inline void loadHalfFloats( Point3 & vec, const unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        unsigned short fp16 = hfptr[i];
+        unsigned int sign = fp16 >> 15;
+        unsigned int exponent = (fp16 >> 10) & ((1 << 5) - 1);
+        unsigned int mantissa = fp16 & ((1 << 10) - 1);
+
+        if (exponent == 0) {
+            // zero
+            mantissa = 0;
+
+        } else if (exponent == 31) {
+            // infinity or nan -> infinity
+            exponent = 255;
+	    mantissa = 0;
+
+        } else {
+            exponent += 127 - 15;
+            mantissa <<= 13;
+        }
+
+        Data32 d;
+        d.u32 = (sign << 31) | (exponent << 23) | mantissa;
+        vec[i] = d.f32;
+    }
+}
+
+inline void storeHalfFloats( const Point3 & vec, unsigned short * hfptr )
+{
+    union Data32 {
+        unsigned int u32;
+        float f32;
+    };
+
+    for (int i = 0; i < 3; i++) {
+        Data32 d;
+        d.f32 = vec[i];
+
+        unsigned int sign = d.u32 >> 31;
+        unsigned int exponent = (d.u32 >> 23) & ((1 << 8) - 1);
+        unsigned int mantissa = d.u32 & ((1 << 23) - 1);;
+
+        if (exponent == 0) {
+            // zero or denorm -> zero
+            mantissa = 0;
+
+        } else if (exponent == 255 && mantissa != 0) {
+            // nan -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent >= 127 - 15 + 31) {
+            // overflow or infinity -> infinity
+            exponent = 31;
+            mantissa = 0;
+
+        } else if (exponent <= 127 - 15) {
+            // underflow -> zero
+            exponent = 0;
+            mantissa = 0;
+
+        } else {
+            exponent -= 127 - 15;
+            mantissa >>= 13;
+        }
+
+        hfptr[i] = (unsigned short)((sign << 15) | (exponent << 10) | mantissa);
+    }
+}
+
 inline Point3 & Point3::operator =( const Point3 & pnt )
 {
     mX = pnt.mX;
diff --git a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h b/Engine/lib/bullet/src/vectormath/scalar/vectormath_aos.h
similarity index 92%
rename from Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h
rename to Engine/lib/bullet/src/vectormath/scalar/vectormath_aos.h
index 7913c11ea..d00456dfe 100644
--- a/Engine/lib/bullet/src/BulletMultiThreaded/vectormath/scalar/cpp/vectormath_aos.h
+++ b/Engine/lib/bullet/src/vectormath/scalar/vectormath_aos.h
@@ -1,34 +1,21 @@
 /*
-   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   Copyright (C) 2009 Sony Computer Entertainment Inc.
    All rights reserved.
 
-   Redistribution and use in source and binary forms,
-   with or without modification, are permitted provided that the
-   following conditions are met:
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of the Sony Computer Entertainment Inc nor the names
-      of its contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose, 
+including commercial applications, and to alter it and redistribute it freely, 
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
 
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-   POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifndef _VECTORMATH_AOS_CPP_SCALAR_H
-#define _VECTORMATH_AOS_CPP_SCALAR_H
+#ifndef _VECTORMATH_AOS_CPP_H
+#define _VECTORMATH_AOS_CPP_H
 
 #include <math.h>
 
@@ -303,6 +290,28 @@ inline const Vector3 slerp( float t, const Vector3 & unitVec0, const Vector3 & u
 // 
 inline const Vector3 select( const Vector3 & vec0, const Vector3 & vec1, bool select1 );
 
+// Load x, y, and z elements from the first three words of a float array.
+// 
+// 
+inline void loadXYZ( Vector3 & vec, const float * fptr );
+
+// Store x, y, and z elements of a 3-D vector in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZ( const Vector3 & vec, float * fptr );
+
+// Load three-half-floats as a 3-D vector
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Vector3 & vec, const unsigned short * hfptr );
+
+// Store a 3-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Vector3 & vec, unsigned short * hfptr );
+
 #ifdef _VECTORMATH_DEBUG
 
 // Print a 3-D vector
@@ -580,6 +589,28 @@ inline const Vector4 slerp( float t, const Vector4 & unitVec0, const Vector4 & u
 // 
 inline const Vector4 select( const Vector4 & vec0, const Vector4 & vec1, bool select1 );
 
+// Load x, y, z, and w elements from the first four words of a float array.
+// 
+// 
+inline void loadXYZW( Vector4 & vec, const float * fptr );
+
+// Store x, y, z, and w elements of a 4-D vector in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZW( const Vector4 & vec, float * fptr );
+
+// Load four-half-floats as a 4-D vector
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Vector4 & vec, const unsigned short * hfptr );
+
+// Store a 4-D vector as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Vector4 & vec, unsigned short * hfptr );
+
 #ifdef _VECTORMATH_DEBUG
 
 // Print a 4-D vector
@@ -792,6 +823,28 @@ inline const Point3 lerp( float t, const Point3 & pnt0, const Point3 & pnt1 );
 // 
 inline const Point3 select( const Point3 & pnt0, const Point3 & pnt1, bool select1 );
 
+// Load x, y, and z elements from the first three words of a float array.
+// 
+// 
+inline void loadXYZ( Point3 & pnt, const float * fptr );
+
+// Store x, y, and z elements of a 3-D point in the first three words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZ( const Point3 & pnt, float * fptr );
+
+// Load three-half-floats as a 3-D point
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs.
+// 
+inline void loadHalfFloats( Point3 & pnt, const unsigned short * hfptr );
+
+// Store a 3-D point as half-floats. Memory area of previous 16 bytes and next 32 bytes from <code><i>hfptr</i></code> might be accessed.
+// NOTE: 
+// This transformation does not support either denormalized numbers or NaNs. Memory area of previous 16 bytes and next 32 bytes from hfptr might be accessed.
+// 
+inline void storeHalfFloats( const Point3 & pnt, unsigned short * hfptr );
+
 #ifdef _VECTORMATH_DEBUG
 
 // Print a 3-D point
@@ -1035,6 +1088,16 @@ inline const Quat squad( float t, const Quat & unitQuat0, const Quat & unitQuat1
 // 
 inline const Quat select( const Quat & quat0, const Quat & quat1, bool select1 );
 
+// Load x, y, z, and w elements from the first four words of a float array.
+// 
+// 
+inline void loadXYZW( Quat & quat, const float * fptr );
+
+// Store x, y, z, and w elements of a quaternion in the first four words of a float array.
+// Memory area of previous 16 bytes and next 32 bytes from fptr might be accessed
+// 
+inline void storeXYZW( const Quat & quat, float * fptr );
+
 #ifdef _VECTORMATH_DEBUG
 
 // Print a quaternion
diff --git a/Engine/lib/bullet/src/vectormath/sse/boolInVec.h b/Engine/lib/bullet/src/vectormath/sse/boolInVec.h
new file mode 100644
index 000000000..d21d25cbb
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/sse/boolInVec.h
@@ -0,0 +1,247 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _BOOLINVEC_H
+#define _BOOLINVEC_H
+
+#include <math.h>
+
+namespace Vectormath {
+
+class floatInVec;
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec class
+//
+
+class boolInVec
+{
+    private:
+        __m128 mData;
+
+        inline boolInVec(__m128 vec);
+    public:
+        inline boolInVec() {}
+
+        // matches standard type conversions
+        //
+        inline boolInVec(const floatInVec &vec);
+
+        // explicit cast from bool
+        //
+        explicit inline boolInVec(bool scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to bool
+        // 
+        inline bool getAsBool() const;
+#else
+        // implicit cast to bool
+        // 
+        inline operator bool() const;
+#endif
+        
+        // get vector data
+        // bool value is splatted across all word slots of vector as 0 (false) or -1 (true)
+        //
+        inline __m128 get128() const;
+
+        // operators
+        //
+        inline const boolInVec operator ! () const;
+        inline boolInVec& operator = (const boolInVec &vec);
+        inline boolInVec& operator &= (const boolInVec &vec);
+        inline boolInVec& operator ^= (const boolInVec &vec);
+        inline boolInVec& operator |= (const boolInVec &vec);
+
+        // friend functions
+        //
+        friend inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
+        friend inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec functions
+//
+
+// operators
+//
+inline const boolInVec operator == (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator != (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator & (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator ^ (const boolInVec &vec0, const boolInVec &vec1);
+inline const boolInVec operator | (const boolInVec &vec0, const boolInVec &vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const boolInVec select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// boolInVec implementation
+//
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+
+inline
+boolInVec::boolInVec(__m128 vec)
+{
+    mData = vec;
+}
+
+inline
+boolInVec::boolInVec(const floatInVec &vec)
+{
+    *this = (vec != floatInVec(0.0f));
+}
+
+inline
+boolInVec::boolInVec(bool scalar)
+{
+    unsigned int mask = -(int)scalar;
+	mData = _mm_set1_ps(*(float *)&mask); // TODO: Union
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+bool
+boolInVec::getAsBool() const
+#else
+inline
+boolInVec::operator bool() const
+#endif
+{
+	return *(bool *)&mData;
+}
+
+inline
+__m128
+boolInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const boolInVec
+boolInVec::operator ! () const
+{
+    return boolInVec(_mm_andnot_ps(mData, _mm_cmpneq_ps(_mm_setzero_ps(),_mm_setzero_ps())));
+}
+
+inline
+boolInVec&
+boolInVec::operator = (const boolInVec &vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator &= (const boolInVec &vec)
+{
+    *this = *this & vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator ^= (const boolInVec &vec)
+{
+    *this = *this ^ vec;
+    return *this;
+}
+
+inline
+boolInVec&
+boolInVec::operator |= (const boolInVec &vec)
+{
+    *this = *this | vec;
+    return *this;
+}
+
+inline
+const boolInVec
+operator == (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
+}
+    
+inline
+const boolInVec
+operator & (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_and_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator | (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_or_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator ^ (const boolInVec &vec0, const boolInVec &vec1)
+{
+	return boolInVec(_mm_xor_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+select(const boolInVec &vec0, const boolInVec &vec1, const boolInVec &select_vec1)
+{
+	return boolInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+ 
+} // namespace Vectormath
+
+#endif // boolInVec_h
diff --git a/Engine/lib/bullet/src/vectormath/sse/floatInVec.h b/Engine/lib/bullet/src/vectormath/sse/floatInVec.h
new file mode 100644
index 000000000..e8ac5959e
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/sse/floatInVec.h
@@ -0,0 +1,340 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _FLOATINVEC_H
+#define _FLOATINVEC_H
+
+#include <math.h>
+#include <xmmintrin.h>
+
+namespace Vectormath {
+
+class boolInVec;
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec class
+//
+
+class floatInVec
+{
+    private:
+        __m128 mData;
+
+    public:
+        inline floatInVec(__m128 vec);
+
+        inline floatInVec() {}
+
+        // matches standard type conversions
+        //
+        inline floatInVec(const boolInVec &vec);
+
+        // construct from a slot of __m128
+        //
+        inline floatInVec(__m128 vec, int slot);
+        
+        // explicit cast from float
+        //
+        explicit inline floatInVec(float scalar);
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+        // explicit cast to float
+        // 
+        inline float getAsFloat() const;
+#else
+        // implicit cast to float
+        //
+        inline operator float() const;
+#endif
+
+        // get vector data
+        // float value is splatted across all word slots of vector
+        //
+        inline __m128 get128() const;
+
+        // operators
+        // 
+        inline const floatInVec operator ++ (int);
+        inline const floatInVec operator -- (int);
+        inline floatInVec& operator ++ ();
+        inline floatInVec& operator -- ();
+        inline const floatInVec operator - () const;
+        inline floatInVec& operator = (const floatInVec &vec);
+        inline floatInVec& operator *= (const floatInVec &vec);
+        inline floatInVec& operator /= (const floatInVec &vec);
+        inline floatInVec& operator += (const floatInVec &vec);
+        inline floatInVec& operator -= (const floatInVec &vec);
+
+        // friend functions
+        //
+        friend inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
+        friend inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, boolInVec select_vec1);
+};
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec functions
+//
+
+// operators
+// 
+inline const floatInVec operator * (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator / (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator + (const floatInVec &vec0, const floatInVec &vec1);
+inline const floatInVec operator - (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator < (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator <= (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator > (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator >= (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator == (const floatInVec &vec0, const floatInVec &vec1);
+inline const boolInVec operator != (const floatInVec &vec0, const floatInVec &vec1);
+
+// select between vec0 and vec1 using boolInVec.
+// false selects vec0, true selects vec1
+//
+inline const floatInVec select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1);
+
+} // namespace Vectormath
+
+//--------------------------------------------------------------------------------------------------
+// floatInVec implementation
+//
+
+#include "boolInVec.h"
+
+namespace Vectormath {
+
+inline
+floatInVec::floatInVec(__m128 vec)
+{
+    mData = vec;
+}
+
+inline
+floatInVec::floatInVec(const boolInVec &vec)
+{
+	mData = vec_sel(_mm_setzero_ps(), _mm_set1_ps(1.0f), vec.get128());
+}
+
+inline
+floatInVec::floatInVec(__m128 vec, int slot)
+{
+	SSEFloat v;
+	v.m128 = vec;
+	mData = _mm_set1_ps(v.f[slot]);
+}
+
+inline
+floatInVec::floatInVec(float scalar)
+{
+	mData = _mm_set1_ps(scalar);
+}
+
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+inline
+float
+floatInVec::getAsFloat() const
+#else
+inline
+floatInVec::operator float() const
+#endif
+{
+    return *((float *)&mData);
+}
+
+inline
+__m128
+floatInVec::get128() const
+{
+    return mData;
+}
+
+inline
+const floatInVec
+floatInVec::operator ++ (int)
+{
+    __m128 olddata = mData;
+    operator ++();
+    return floatInVec(olddata);
+}
+
+inline
+const floatInVec
+floatInVec::operator -- (int)
+{
+    __m128 olddata = mData;
+    operator --();
+    return floatInVec(olddata);
+}
+
+inline
+floatInVec&
+floatInVec::operator ++ ()
+{
+    *this += floatInVec(_mm_set1_ps(1.0f));
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -- ()
+{
+    *this -= floatInVec(_mm_set1_ps(1.0f));
+    return *this;
+}
+
+inline
+const floatInVec
+floatInVec::operator - () const
+{
+    return floatInVec(_mm_sub_ps(_mm_setzero_ps(), mData));
+}
+
+inline
+floatInVec&
+floatInVec::operator = (const floatInVec &vec)
+{
+    mData = vec.mData;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator *= (const floatInVec &vec)
+{
+    *this = *this * vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator /= (const floatInVec &vec)
+{
+    *this = *this / vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator += (const floatInVec &vec)
+{
+    *this = *this + vec;
+    return *this;
+}
+
+inline
+floatInVec&
+floatInVec::operator -= (const floatInVec &vec)
+{
+    *this = *this - vec;
+    return *this;
+}
+
+inline
+const floatInVec
+operator * (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_mul_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator / (const floatInVec &num, const floatInVec &den)
+{
+    return floatInVec(_mm_div_ps(num.get128(), den.get128()));
+}
+
+inline
+const floatInVec
+operator + (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_add_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const floatInVec
+operator - (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return floatInVec(_mm_sub_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator < (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpgt_ps(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator <= (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpge_ps(vec1.get128(), vec0.get128()));
+}
+
+inline
+const boolInVec
+operator > (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpgt_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator >= (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpge_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator == (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpeq_ps(vec0.get128(), vec1.get128()));
+}
+
+inline
+const boolInVec
+operator != (const floatInVec &vec0, const floatInVec &vec1)
+{
+    return boolInVec(_mm_cmpneq_ps(vec0.get128(), vec1.get128()));
+}
+    
+inline
+const floatInVec
+select(const floatInVec &vec0, const floatInVec &vec1, const boolInVec &select_vec1)
+{
+    return floatInVec(vec_sel(vec0.get128(), vec1.get128(), select_vec1.get128()));
+}
+
+} // namespace Vectormath
+
+#endif // floatInVec_h
diff --git a/Engine/lib/bullet/src/vectormath/sse/mat_aos.h b/Engine/lib/bullet/src/vectormath/sse/mat_aos.h
new file mode 100644
index 000000000..a2c66cc5f
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/sse/mat_aos.h
@@ -0,0 +1,2190 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_MAT_AOS_CPP_H
+#define _VECTORMATH_MAT_AOS_CPP_H
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Constants
+// for shuffles, words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_ZBWX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XCYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_C, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XYAB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_ZWCD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W, _VECTORMATH_PERM_C, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XZBX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_B, _VECTORMATH_PERM_X })     
+#define _VECTORMATH_PERM_CXXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YAXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XAZC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_YXWZ ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W, _VECTORMATH_PERM_Z })
+#define _VECTORMATH_PERM_YBWD ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_B, _VECTORMATH_PERM_W, _VECTORMATH_PERM_D })
+#define _VECTORMATH_PERM_XYCX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_YCXY ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y })
+#define _VECTORMATH_PERM_CXYC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_C, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_ZAYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_BZXX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PERM_XZYA ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A })
+#define _VECTORMATH_PERM_ZXXB ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_B })
+#define _VECTORMATH_PERM_YXXC ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X, _VECTORMATH_PERM_X, _VECTORMATH_PERM_C })
+#define _VECTORMATH_PERM_BBYX ((vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_B, _VECTORMATH_PERM_B, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_X })
+#define _VECTORMATH_PI_OVER_2 1.570796327f
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const floatInVec &scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Quat &unitQuat )
+{
+    __m128 xyzw_2, wwww, yzxw, zxyw, yzxw_2, zxyw_2;
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+	VM_ATTRIBUTE_ALIGN16 unsigned int sx[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int sz[4] = {0, 0, 0xffffffff, 0};
+	__m128 select_x = _mm_load_ps((float *)sx);
+	__m128 select_z = _mm_load_ps((float *)sz);
+
+    xyzw_2 = _mm_add_ps( unitQuat.get128(), unitQuat.get128() );
+    wwww = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,3,3,3) );
+	yzxw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,0,2,1) );
+	zxyw = _mm_shuffle_ps( unitQuat.get128(), unitQuat.get128(), _MM_SHUFFLE(3,1,0,2) );
+    yzxw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,0,2,1) );
+    zxyw_2 = _mm_shuffle_ps( xyzw_2, xyzw_2, _MM_SHUFFLE(3,1,0,2) );
+
+    tmp0 = _mm_mul_ps( yzxw_2, wwww );									// tmp0 = 2yw, 2zw, 2xw, 2w2
+	tmp1 = _mm_sub_ps( _mm_set1_ps(1.0f), _mm_mul_ps(yzxw, yzxw_2) );	// tmp1 = 1 - 2y2, 1 - 2z2, 1 - 2x2, 1 - 2w2
+    tmp2 = _mm_mul_ps( yzxw, xyzw_2 );									// tmp2 = 2xy, 2yz, 2xz, 2w2
+    tmp0 = _mm_add_ps( _mm_mul_ps(zxyw, xyzw_2), tmp0 );				// tmp0 = 2yw + 2zx, 2zw + 2xy, 2xw + 2yz, 2w2 + 2w2
+    tmp1 = _mm_sub_ps( tmp1, _mm_mul_ps(zxyw, zxyw_2) );				// tmp1 = 1 - 2y2 - 2z2, 1 - 2z2 - 2x2, 1 - 2x2 - 2y2, 1 - 2w2 - 2w2
+    tmp2 = _mm_sub_ps( tmp2, _mm_mul_ps(zxyw_2, wwww) );				// tmp2 = 2xy - 2zw, 2yz - 2xw, 2xz - 2yw, 2w2 -2w2
+
+    tmp3 = vec_sel( tmp0, tmp1, select_x );
+    tmp4 = vec_sel( tmp1, tmp2, select_x );
+    tmp5 = vec_sel( tmp2, tmp0, select_x );
+    mCol0 = Vector3( vec_sel( tmp3, tmp2, select_z ) );
+    mCol1 = Vector3( vec_sel( tmp4, tmp0, select_z ) );
+    mCol2 = Vector3( vec_sel( tmp5, tmp1, select_z ) );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3::Matrix3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol0( const Vector3 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol1( const Vector3 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol2( const Vector3 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setCol( int col, const Vector3 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setRow( int row, const Vector3 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::setElem( int col, int row, const floatInVec &val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Matrix3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol0( ) const
+{
+    return mCol0;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol1( ) const
+{
+    return mCol1;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol2( ) const
+{
+    return mCol2;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::getRow( int row ) const
+{
+    return Vector3( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Matrix3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator =( const Matrix3 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, res0, res1, res2;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    res0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    //res1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	res1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	res1 = vec_sel(res1, mat.getCol1().get128(), select_y);
+    //res2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	res2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	res2 = vec_sel(res2, vec_splat(mat.getCol1().get128(), 2), select_y);
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet, inv0, inv1, inv2;
+    tmp2 = _vmathVfCross( mat.getCol0().get128(), mat.getCol1().get128() );
+    tmp0 = _vmathVfCross( mat.getCol1().get128(), mat.getCol2().get128() );
+    tmp1 = _vmathVfCross( mat.getCol2().get128(), mat.getCol0().get128() );
+    dot = _vmathVfDot3( tmp2, mat.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tmp1, select_y);
+    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
+    inv0 = vec_mul( inv0, invdet );
+    inv1 = vec_mul( inv1, invdet );
+	inv2 = vec_mul( inv2, invdet );
+    return Matrix3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat )
+{
+    return dot( mat.getCol2(), cross( mat.getCol0(), mat.getCol1() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator +( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator +=( const Matrix3 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator -=( const Matrix3 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator -( ) const
+{
+    return Matrix3(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat )
+{
+    return Matrix3(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const floatInVec &scalar ) const
+{
+    return Matrix3(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat )
+{
+    return mat * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix3::operator *( const Vector3 &vec ) const
+{
+    __m128 res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_mul( mCol0.get128(), xxxx );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::operator *( const Matrix3 & mat ) const
+{
+    return Matrix3(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix3 & Matrix3::operator *=( const Matrix3 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 )
+{
+    return Matrix3(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::identity( )
+{
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 )
+	);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( )
+	);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Matrix3(
+        Vector3( vec_mul( Z0, Y0 ) ),
+        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
+    axisS = vec_mul( axis, s );
+    negAxisS = negatef4( axisS );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
+	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
+    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
+    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
+	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
+    tmp0 = vec_sel( tmp0, c, select_x );
+    tmp1 = vec_sel( tmp1, c, select_y );
+    tmp2 = vec_sel( tmp2, c, select_z );
+    return Matrix3(
+        Vector3( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector3( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector3( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::rotation( const Quat &unitQuat )
+{
+    return Matrix3( unitQuat );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix3::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Matrix3(
+        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec )
+{
+    return Matrix3(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat )
+{
+    return Matrix3(
+        mulPerElem( mat.getCol0(), scaleVec ),
+        mulPerElem( mat.getCol1(), scaleVec ),
+        mulPerElem( mat.getCol2(), scaleVec )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 )
+{
+    return Matrix3(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( float scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const floatInVec &scalar )
+{
+    mCol0 = Vector4( scalar );
+    mCol1 = Vector4( scalar );
+    mCol2 = Vector4( scalar );
+    mCol3 = Vector4( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Transform3 & mat )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( mat.getCol3(), 1.0f );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Vector4 &_col0, const Vector4 &_col1, const Vector4 &_col2, const Vector4 &_col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Matrix3 & mat, const Vector3 &translateVec )
+{
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4::Matrix4( const Quat &unitQuat, const Vector3 &translateVec )
+{
+    Matrix3 mat;
+    mat = Matrix3( unitQuat );
+    mCol0 = Vector4( mat.getCol0(), 0.0f );
+    mCol1 = Vector4( mat.getCol1(), 0.0f );
+    mCol2 = Vector4( mat.getCol2(), 0.0f );
+    mCol3 = Vector4( translateVec, 1.0f );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol0( const Vector4 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol1( const Vector4 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol2( const Vector4 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol3( const Vector4 &_col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setCol( int col, const Vector4 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setRow( int row, const Vector4 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setElem( int col, int row, const floatInVec &val )
+{
+    Vector4 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Matrix4::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol0( ) const
+{
+    return mCol0;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol1( ) const
+{
+    return mCol1;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol2( ) const
+{
+    return mCol2;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol3( ) const
+{
+    return mCol3;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Matrix4::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator =( const Matrix4 & mat )
+{
+    mCol0 = mat.mCol0;
+    mCol1 = mat.mCol1;
+    mCol2 = mat.mCol2;
+    mCol3 = mat.mCol3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, res0, res1, res2, res3;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergeh( mat.getCol1().get128(), mat.getCol3().get128() );
+    tmp2 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp3 = vec_mergel( mat.getCol1().get128(), mat.getCol3().get128() );
+    res0 = vec_mergeh( tmp0, tmp1 );
+    res1 = vec_mergel( tmp0, tmp1 );
+    res2 = vec_mergeh( tmp2, tmp3 );
+    res3 = vec_mergel( tmp2, tmp3 );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4( res3 )
+    );
+}
+
+// TODO: Tidy
+static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathPNPN[4] = {0x00000000, 0x80000000, 0x00000000, 0x80000000};
+static VM_ATTRIBUTE_ALIGN16 const unsigned int _vmathNPNP[4] = {0x80000000, 0x00000000, 0x80000000, 0x00000000};
+static VM_ATTRIBUTE_ALIGN16 const float _vmathZERONE[4] = {1.0f, 0.0f, 0.0f, 1.0f};
+
+VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat )
+{
+	__m128 Va,Vb,Vc;
+	__m128 r1,r2,r3,tt,tt2;
+	__m128 sum,Det,RDet;
+	__m128 trns0,trns1,trns2,trns3;
+
+	__m128 _L1 = mat.getCol0().get128();
+	__m128 _L2 = mat.getCol1().get128();
+	__m128 _L3 = mat.getCol2().get128();
+	__m128 _L4 = mat.getCol3().get128();
+	// Calculating the minterms for the first line.
+
+	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
+	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
+	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3'dot V4
+	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3'dot V4"
+	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3' dot V4^
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3" dot V4^ - V3^ dot V4"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^ dot V4' - V3' dot V4^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3' dot V4" - V3" dot V4'
+
+	tt = _L2;
+	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
+	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
+	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
+
+	// Calculating the determinant.
+	Det = _mm_mul_ps(sum,_L1);
+	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
+
+	const __m128 Sign_PNPN = _mm_load_ps((float *)_vmathPNPN);
+	const __m128 Sign_NPNP = _mm_load_ps((float *)_vmathNPNP);
+
+	__m128 mtL1 = _mm_xor_ps(sum,Sign_PNPN);
+
+	// Calculating the minterms of the second line (using previous results).
+	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL2 = _mm_xor_ps(sum,Sign_NPNP);
+
+	// Testing the determinant.
+	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
+
+	// Calculating the minterms of the third line.
+	tt = _mm_ror_ps(_L1,1);
+	Va = _mm_mul_ps(tt,Vb);									// V1' dot V2"
+	Vb = _mm_mul_ps(tt,Vc);									// V1' dot V2^
+	Vc = _mm_mul_ps(tt,_L2);								// V1' dot V2
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V1" dot V2^ - V1^ dot V2"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V1^ dot V2' - V1' dot V2^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V1' dot V2" - V1" dot V2'
+
+	tt = _mm_ror_ps(_L4,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL3 = _mm_xor_ps(sum,Sign_PNPN);
+
+	// Dividing is FASTER than rcp_nr! (Because rcp_nr causes many register-memory RWs).
+	RDet = _mm_div_ss(_mm_load_ss((float *)&_vmathZERONE), Det); // TODO: just 1.0f?
+	RDet = _mm_shuffle_ps(RDet,RDet,0x00);
+
+	// Devide the first 12 minterms with the determinant.
+	mtL1 = _mm_mul_ps(mtL1, RDet);
+	mtL2 = _mm_mul_ps(mtL2, RDet);
+	mtL3 = _mm_mul_ps(mtL3, RDet);
+
+	// Calculate the minterms of the forth line and devide by the determinant.
+	tt = _mm_ror_ps(_L3,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+	__m128 mtL4 = _mm_xor_ps(sum,Sign_NPNP);
+	mtL4 = _mm_mul_ps(mtL4, RDet);
+
+	// Now we just have to transpose the minterms matrix.
+	trns0 = _mm_unpacklo_ps(mtL1,mtL2);
+	trns1 = _mm_unpacklo_ps(mtL3,mtL4);
+	trns2 = _mm_unpackhi_ps(mtL1,mtL2);
+	trns3 = _mm_unpackhi_ps(mtL3,mtL4);
+	_L1 = _mm_movelh_ps(trns0,trns1);
+	_L2 = _mm_movehl_ps(trns1,trns0);
+	_L3 = _mm_movelh_ps(trns2,trns3);
+	_L4 = _mm_movehl_ps(trns3,trns2);
+
+    return Matrix4(
+        Vector4( _L1 ),
+        Vector4( _L2 ),
+        Vector4( _L3 ),
+        Vector4( _L4 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( inverse( affineMat ) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat )
+{
+    Transform3 affineMat;
+    affineMat.setCol0( mat.getCol0().getXYZ( ) );
+    affineMat.setCol1( mat.getCol1().getXYZ( ) );
+    affineMat.setCol2( mat.getCol2().getXYZ( ) );
+    affineMat.setCol3( mat.getCol3().getXYZ( ) );
+    return Matrix4( orthoInverse( affineMat ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat )
+{
+	__m128 Va,Vb,Vc;
+	__m128 r1,r2,r3,tt,tt2;
+	__m128 sum,Det;
+
+	__m128 _L1 = mat.getCol0().get128();
+	__m128 _L2 = mat.getCol1().get128();
+	__m128 _L3 = mat.getCol2().get128();
+	__m128 _L4 = mat.getCol3().get128();
+	// Calculating the minterms for the first line.
+
+	// _mm_ror_ps is just a macro using _mm_shuffle_ps().
+	tt = _L4; tt2 = _mm_ror_ps(_L3,1); 
+	Vc = _mm_mul_ps(tt2,_mm_ror_ps(tt,0));					// V3' dot V4
+	Va = _mm_mul_ps(tt2,_mm_ror_ps(tt,2));					// V3' dot V4"
+	Vb = _mm_mul_ps(tt2,_mm_ror_ps(tt,3));					// V3' dot V4^
+
+	r1 = _mm_sub_ps(_mm_ror_ps(Va,1),_mm_ror_ps(Vc,2));		// V3" dot V4^ - V3^ dot V4"
+	r2 = _mm_sub_ps(_mm_ror_ps(Vb,2),_mm_ror_ps(Vb,0));		// V3^ dot V4' - V3' dot V4^
+	r3 = _mm_sub_ps(_mm_ror_ps(Va,0),_mm_ror_ps(Vc,1));		// V3' dot V4" - V3" dot V4'
+
+	tt = _L2;
+	Va = _mm_ror_ps(tt,1);		sum = _mm_mul_ps(Va,r1);
+	Vb = _mm_ror_ps(tt,2);		sum = _mm_add_ps(sum,_mm_mul_ps(Vb,r2));
+	Vc = _mm_ror_ps(tt,3);		sum = _mm_add_ps(sum,_mm_mul_ps(Vc,r3));
+
+	// Calculating the determinant.
+	Det = _mm_mul_ps(sum,_L1);
+	Det = _mm_add_ps(Det,_mm_movehl_ps(Det,Det));
+
+	// Calculating the minterms of the second line (using previous results).
+	tt = _mm_ror_ps(_L1,1);		sum = _mm_mul_ps(tt,r1);
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r2));
+	tt = _mm_ror_ps(tt,1);		sum = _mm_add_ps(sum,_mm_mul_ps(tt,r3));
+
+	// Testing the determinant.
+	Det = _mm_sub_ss(Det,_mm_shuffle_ps(Det,Det,1));
+	return floatInVec(Det, 0);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator +( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 + mat.mCol0 ),
+        ( mCol1 + mat.mCol1 ),
+        ( mCol2 + mat.mCol2 ),
+        ( mCol3 + mat.mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( mCol0 - mat.mCol0 ),
+        ( mCol1 - mat.mCol1 ),
+        ( mCol2 - mat.mCol2 ),
+        ( mCol3 - mat.mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator +=( const Matrix4 & mat )
+{
+    *this = *this + mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator -=( const Matrix4 & mat )
+{
+    *this = *this - mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator -( ) const
+{
+    return Matrix4(
+        ( -mCol0 ),
+        ( -mCol1 ),
+        ( -mCol2 ),
+        ( -mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat )
+{
+    return Matrix4(
+        absPerElem( mat.getCol0() ),
+        absPerElem( mat.getCol1() ),
+        absPerElem( mat.getCol2() ),
+        absPerElem( mat.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const floatInVec &scalar ) const
+{
+    return Matrix4(
+        ( mCol0 * scalar ),
+        ( mCol1 * scalar ),
+        ( mCol2 * scalar ),
+        ( mCol3 * scalar )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat )
+{
+    return floatInVec(scalar) * mat;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat )
+{
+    return mat * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector4 &vec ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))), _mm_mul_ps(mCol3.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(3,3,3,3)))))
+		);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Vector3 &vec ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(vec.get128(), vec.get128(), _MM_SHUFFLE(2,2,2,2))))
+		);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Matrix4::operator *( const Point3 &pnt ) const
+{
+    return Vector4(
+		_mm_add_ps(
+			_mm_add_ps(_mm_mul_ps(mCol0.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(mCol1.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(1,1,1,1)))),
+			_mm_add_ps(_mm_mul_ps(mCol2.get128(), _mm_shuffle_ps(pnt.get128(), pnt.get128(), _MM_SHUFFLE(2,2,2,2))), mCol3.get128()))
+		);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Matrix4 & mat ) const
+{
+    return Matrix4(
+        ( *this * mat.mCol0 ),
+        ( *this * mat.mCol1 ),
+        ( *this * mat.mCol2 ),
+        ( *this * mat.mCol3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Matrix4 & mat )
+{
+    *this = *this * mat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::operator *( const Transform3 & tfrm ) const
+{
+    return Matrix4(
+        ( *this * tfrm.getCol0() ),
+        ( *this * tfrm.getCol1() ),
+        ( *this * tfrm.getCol2() ),
+        ( *this * Point3( tfrm.getCol3() ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 )
+{
+    return Matrix4(
+        mulPerElem( mat0.getCol0(), mat1.getCol0() ),
+        mulPerElem( mat0.getCol1(), mat1.getCol1() ),
+        mulPerElem( mat0.getCol2(), mat1.getCol2() ),
+        mulPerElem( mat0.getCol3(), mat1.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::identity( )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setUpper3x3( const Matrix3 & mat3 )
+{
+    mCol0.setXYZ( mat3.getCol0() );
+    mCol1.setXYZ( mat3.getCol1() );
+    mCol2.setXYZ( mat3.getCol2() );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Matrix4::getUpper3x3( ) const
+{
+    return Matrix3(
+        mCol0.getXYZ( ),
+        mCol1.getXYZ( ),
+        mCol2.getXYZ( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Matrix4 & Matrix4::setTranslation( const Vector3 &translateVec )
+{
+    mCol3.setXYZ( translateVec );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Matrix4::getTranslation( ) const
+{
+    return mCol3.getXYZ( );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4( res1 ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4::yAxis( ),
+        Vector4( res2 ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Matrix4(
+        Vector4( res0 ),
+        Vector4( res1 ),
+        Vector4::zAxis( ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Matrix4(
+        Vector4( vec_mul( Z0, Y0 ) ),
+        Vector4( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector4( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 axis, s, c, oneMinusC, axisS, negAxisS, xxxx, yyyy, zzzz, tmp0, tmp1, tmp2;
+    axis = unitVec.get128();
+    sincosf4( radians.get128(), &s, &c );
+    xxxx = vec_splat( axis, 0 );
+    yyyy = vec_splat( axis, 1 );
+    zzzz = vec_splat( axis, 2 );
+    oneMinusC = vec_sub( _mm_set1_ps(1.0f), c );
+    axisS = vec_mul( axis, s );
+    negAxisS = negatef4( axisS );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //tmp0 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_XZBX );
+	tmp0 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,2,0) );
+	tmp0 = vec_sel(tmp0, vec_splat(negAxisS, 1), select_z);
+    //tmp1 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_CXXX );
+	tmp1 = vec_sel( vec_splat(axisS, 0), vec_splat(negAxisS, 2), select_x );
+    //tmp2 = vec_perm( axisS, negAxisS, _VECTORMATH_PERM_YAXX );
+	tmp2 = _mm_shuffle_ps( axisS, axisS, _MM_SHUFFLE(0,0,0,1) );
+	tmp2 = vec_sel(tmp2, vec_splat(negAxisS, 0), select_y);
+    tmp0 = vec_sel( tmp0, c, select_x );
+    tmp1 = vec_sel( tmp1, c, select_y );
+    tmp2 = vec_sel( tmp2, c, select_z );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    axis = vec_and( axis, _mm_load_ps( (float *)select_xyz ) );
+    tmp0 = vec_and( tmp0, _mm_load_ps( (float *)select_xyz ) );
+    tmp1 = vec_and( tmp1, _mm_load_ps( (float *)select_xyz ) );
+    tmp2 = vec_and( tmp2, _mm_load_ps( (float *)select_xyz ) );
+    return Matrix4(
+        Vector4( vec_madd( vec_mul( axis, xxxx ), oneMinusC, tmp0 ) ),
+        Vector4( vec_madd( vec_mul( axis, yyyy ), oneMinusC, tmp1 ) ),
+        Vector4( vec_madd( vec_mul( axis, zzzz ), oneMinusC, tmp2 ) ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::rotation( const Quat &unitQuat )
+{
+    return Matrix4( Transform3::rotation( unitQuat ) );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Matrix4(
+        Vector4( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector4( vec_sel( zero, scaleVec.get128(), select_z ) ),
+        Vector4::wAxis( )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec )
+{
+    return Matrix4(
+        ( mat.getCol0() * scaleVec.getX( ) ),
+        ( mat.getCol1() * scaleVec.getY( ) ),
+        ( mat.getCol2() * scaleVec.getZ( ) ),
+        mat.getCol3()
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat )
+{
+    Vector4 scale4;
+    scale4 = Vector4( scaleVec, 1.0f );
+    return Matrix4(
+        mulPerElem( mat.getCol0(), scale4 ),
+        mulPerElem( mat.getCol1(), scale4 ),
+        mulPerElem( mat.getCol2(), scale4 ),
+        mulPerElem( mat.getCol3(), scale4 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::translation( const Vector3 &translateVec )
+{
+    return Matrix4(
+        Vector4::xAxis( ),
+        Vector4::yAxis( ),
+        Vector4::zAxis( ),
+        Vector4( translateVec, 1.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec )
+{
+    Matrix4 m4EyeFrame;
+    Vector3 v3X, v3Y, v3Z;
+    v3Y = normalize( upVec );
+    v3Z = normalize( ( eyePos - lookAtPos ) );
+    v3X = normalize( cross( v3Y, v3Z ) );
+    v3Y = cross( v3Z, v3X );
+    m4EyeFrame = Matrix4( Vector4( v3X ), Vector4( v3Y ), Vector4( v3Z ), Vector4( eyePos ) );
+    return orthoInverse( m4EyeFrame );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::perspective( float fovyRadians, float aspect, float zNear, float zFar )
+{
+    float f, rangeInv;
+    __m128 zero, col0, col1, col2, col3;
+    union { __m128 v; float s[4]; } tmp;
+    f = tanf( _VECTORMATH_PI_OVER_2 - fovyRadians * 0.5f );
+    rangeInv = 1.0f / ( zNear - zFar );
+    zero = _mm_setzero_ps();
+    tmp.v = zero;
+    tmp.s[0] = f / aspect;
+    col0 = tmp.v;
+    tmp.v = zero;
+    tmp.s[1] = f;
+    col1 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = ( zNear + zFar ) * rangeInv;
+    tmp.s[3] = -1.0f;
+    col2 = tmp.v;
+    tmp.v = zero;
+    tmp.s[2] = zNear * zFar * rangeInv * 2.0f;
+    col3 = tmp.v;
+    return Matrix4(
+        Vector4( col0 ),
+        Vector4( col1 ),
+        Vector4( col2 ),
+        Vector4( col3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::frustum( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    __m128 lbf, rtn;
+    __m128 diff, sum, inv_diff;
+    __m128 diagonal, column, near2;
+    __m128 zero = _mm_setzero_ps();
+    union { __m128 v; float s[4]; } l, f, r, n, b, t; // TODO: Union?
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    near2 = vec_splat( n.v, 0 );
+    near2 = vec_add( near2, near2 );
+    diagonal = vec_mul( near2, inv_diff );
+    column = vec_mul( sum, inv_diff );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, select_x ) ),
+        Vector4( vec_sel( zero, diagonal, select_y ) ),
+        Vector4( vec_sel( column, _mm_set1_ps(-1.0f), select_w ) ),
+        Vector4( vec_sel( zero, vec_mul( diagonal, vec_splat( f.v, 0 ) ), select_z ) )
+	);
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 Matrix4::orthographic( float left, float right, float bottom, float top, float zNear, float zFar )
+{
+    /* function implementation based on code from STIDC SDK:           */
+    /* --------------------------------------------------------------  */
+    /* PLEASE DO NOT MODIFY THIS SECTION                               */
+    /* This prolog section is automatically generated.                 */
+    /*                                                                 */
+    /* (C)Copyright                                                    */
+    /* Sony Computer Entertainment, Inc.,                              */
+    /* Toshiba Corporation,                                            */
+    /* International Business Machines Corporation,                    */
+    /* 2001,2002.                                                      */
+    /* S/T/I Confidential Information                                  */
+    /* --------------------------------------------------------------  */
+    __m128 lbf, rtn;
+    __m128 diff, sum, inv_diff, neg_inv_diff;
+    __m128 diagonal, column;
+    __m128 zero = _mm_setzero_ps();
+    union { __m128 v; float s[4]; } l, f, r, n, b, t;
+    l.s[0] = left;
+    f.s[0] = zFar;
+    r.s[0] = right;
+    n.s[0] = zNear;
+    b.s[0] = bottom;
+    t.s[0] = top;
+    lbf = vec_mergeh( l.v, f.v );
+    rtn = vec_mergeh( r.v, n.v );
+    lbf = vec_mergeh( lbf, b.v );
+    rtn = vec_mergeh( rtn, t.v );
+    diff = vec_sub( rtn, lbf );
+    sum  = vec_add( rtn, lbf );
+    inv_diff = recipf4( diff );
+    neg_inv_diff = negatef4( inv_diff );
+    diagonal = vec_add( inv_diff, inv_diff );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+    column = vec_mul( sum, vec_sel( neg_inv_diff, inv_diff, select_z ) ); // TODO: no madds with zero
+    return Matrix4(
+        Vector4( vec_sel( zero, diagonal, select_x ) ),
+        Vector4( vec_sel( zero, diagonal, select_y ) ),
+        Vector4( vec_sel( zero, diagonal, select_z ) ),
+        Vector4( vec_sel( column, _mm_set1_ps(1.0f), select_w ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 )
+{
+    return Matrix4(
+        select( mat0.getCol0(), mat1.getCol0(), select1 ),
+        select( mat0.getCol1(), mat1.getCol1(), select1 ),
+        select( mat0.getCol2(), mat1.getCol2(), select1 ),
+        select( mat0.getCol3(), mat1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat )
+{
+    print( mat.getRow( 0 ) );
+    print( mat.getRow( 1 ) );
+    print( mat.getRow( 2 ) );
+    print( mat.getRow( 3 ) );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name )
+{
+    printf("%s:\n", name);
+    print( mat );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( float scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const floatInVec &scalar )
+{
+    mCol0 = Vector3( scalar );
+    mCol1 = Vector3( scalar );
+    mCol2 = Vector3( scalar );
+    mCol3 = Vector3( scalar );
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Vector3 &_col0, const Vector3 &_col1, const Vector3 &_col2, const Vector3 &_col3 )
+{
+    mCol0 = _col0;
+    mCol1 = _col1;
+    mCol2 = _col2;
+    mCol3 = _col3;
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Matrix3 & tfrm, const Vector3 &translateVec )
+{
+    this->setUpper3x3( tfrm );
+    this->setTranslation( translateVec );
+}
+
+VECTORMATH_FORCE_INLINE Transform3::Transform3( const Quat &unitQuat, const Vector3 &translateVec )
+{
+    this->setUpper3x3( Matrix3( unitQuat ) );
+    this->setTranslation( translateVec );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol0( const Vector3 &_col0 )
+{
+    mCol0 = _col0;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol1( const Vector3 &_col1 )
+{
+    mCol1 = _col1;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol2( const Vector3 &_col2 )
+{
+    mCol2 = _col2;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol3( const Vector3 &_col3 )
+{
+    mCol3 = _col3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setCol( int col, const Vector3 &vec )
+{
+    *(&mCol0 + col) = vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setRow( int row, const Vector4 &vec )
+{
+    mCol0.setElem( row, vec.getElem( 0 ) );
+    mCol1.setElem( row, vec.getElem( 1 ) );
+    mCol2.setElem( row, vec.getElem( 2 ) );
+    mCol3.setElem( row, vec.getElem( 3 ) );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, float val )
+{
+    (*this)[col].setElem(row, val);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setElem( int col, int row, const floatInVec &val )
+{
+    Vector3 tmpV3_0;
+    tmpV3_0 = this->getCol( col );
+    tmpV3_0.setElem( row, val );
+    this->setCol( col, tmpV3_0 );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Transform3::getElem( int col, int row ) const
+{
+    return this->getCol( col ).getElem( row );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol0( ) const
+{
+    return mCol0;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol1( ) const
+{
+    return mCol1;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol2( ) const
+{
+    return mCol2;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol3( ) const
+{
+    return mCol3;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getCol( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Transform3::getRow( int row ) const
+{
+    return Vector4( mCol0.getElem( row ), mCol1.getElem( row ), mCol2.getElem( row ), mCol3.getElem( row ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Transform3::operator []( int col )
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator []( int col ) const
+{
+    return *(&mCol0 + col);
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator =( const Transform3 & tfrm )
+{
+    mCol0 = tfrm.mCol0;
+    mCol1 = tfrm.mCol1;
+    mCol2 = tfrm.mCol2;
+    mCol3 = tfrm.mCol3;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm )
+{
+    __m128 inv0, inv1, inv2, inv3;
+    __m128 tmp0, tmp1, tmp2, tmp3, tmp4, dot, invdet;
+    __m128 xxxx, yyyy, zzzz;
+    tmp2 = _vmathVfCross( tfrm.getCol0().get128(), tfrm.getCol1().get128() );
+    tmp0 = _vmathVfCross( tfrm.getCol1().get128(), tfrm.getCol2().get128() );
+    tmp1 = _vmathVfCross( tfrm.getCol2().get128(), tfrm.getCol0().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    dot = _vmathVfDot3( tmp2, tfrm.getCol2().get128() );
+    dot = vec_splat( dot, 0 );
+    invdet = recipf4( dot );
+    tmp3 = vec_mergeh( tmp0, tmp2 );
+    tmp4 = vec_mergel( tmp0, tmp2 );
+    inv0 = vec_mergeh( tmp3, tmp1 );
+    xxxx = vec_splat( inv3, 0 );
+    //inv1 = vec_perm( tmp3, tmp1, _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp3, tmp3, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tmp1, select_y);
+    //inv2 = vec_perm( tmp4, tmp1, _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp4, tmp4, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tmp1, 2), select_y);
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_mul( inv0, xxxx );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    inv0 = vec_mul( inv0, invdet );
+    inv1 = vec_mul( inv1, invdet );
+    inv2 = vec_mul( inv2, invdet );
+    inv3 = vec_mul( inv3, invdet );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm )
+{
+    __m128 inv0, inv1, inv2, inv3;
+    __m128 tmp0, tmp1;
+    __m128 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    tmp1 = vec_mergel( tfrm.getCol0().get128(), tfrm.getCol2().get128() );
+    inv3 = negatef4( tfrm.getCol3().get128() );
+    inv0 = vec_mergeh( tmp0, tfrm.getCol1().get128() );
+    xxxx = vec_splat( inv3, 0 );
+    //inv1 = vec_perm( tmp0, tfrm.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	inv1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	inv1 = vec_sel(inv1, tfrm.getCol1().get128(), select_y);
+    //inv2 = vec_perm( tmp1, tfrm.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	inv2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	inv2 = vec_sel(inv2, vec_splat(tfrm.getCol1().get128(), 2), select_y);
+    yyyy = vec_splat( inv3, 1 );
+    zzzz = vec_splat( inv3, 2 );
+    inv3 = vec_mul( inv0, xxxx );
+    inv3 = vec_madd( inv1, yyyy, inv3 );
+    inv3 = vec_madd( inv2, zzzz, inv3 );
+    return Transform3(
+        Vector3( inv0 ),
+        Vector3( inv1 ),
+        Vector3( inv2 ),
+        Vector3( inv3 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm )
+{
+    return Transform3(
+        absPerElem( tfrm.getCol0() ),
+        absPerElem( tfrm.getCol1() ),
+        absPerElem( tfrm.getCol2() ),
+        absPerElem( tfrm.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::operator *( const Vector3 &vec ) const
+{
+    __m128 res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( vec.get128(), 0 );
+    yyyy = vec_splat( vec.get128(), 1 );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_mul( mCol0.get128(), xxxx );
+    res = vec_madd( mCol1.get128(), yyyy, res );
+    res = vec_madd( mCol2.get128(), zzzz, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Transform3::operator *( const Point3 &pnt ) const
+{
+    __m128 tmp0, tmp1, res;
+    __m128 xxxx, yyyy, zzzz;
+    xxxx = vec_splat( pnt.get128(), 0 );
+    yyyy = vec_splat( pnt.get128(), 1 );
+    zzzz = vec_splat( pnt.get128(), 2 );
+    tmp0 = vec_mul( mCol0.get128(), xxxx );
+    tmp1 = vec_mul( mCol1.get128(), yyyy );
+    tmp0 = vec_madd( mCol2.get128(), zzzz, tmp0 );
+    tmp1 = vec_add( mCol3.get128(), tmp1 );
+    res = vec_add( tmp0, tmp1 );
+    return Point3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::operator *( const Transform3 & tfrm ) const
+{
+    return Transform3(
+        ( *this * tfrm.mCol0 ),
+        ( *this * tfrm.mCol1 ),
+        ( *this * tfrm.mCol2 ),
+        Vector3( ( *this * Point3( tfrm.mCol3 ) ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::operator *=( const Transform3 & tfrm )
+{
+    *this = *this * tfrm;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 )
+{
+    return Transform3(
+        mulPerElem( tfrm0.getCol0(), tfrm1.getCol0() ),
+        mulPerElem( tfrm0.getCol1(), tfrm1.getCol1() ),
+        mulPerElem( tfrm0.getCol2(), tfrm1.getCol2() ),
+        mulPerElem( tfrm0.getCol3(), tfrm1.getCol3() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::identity( )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setUpper3x3( const Matrix3 & tfrm )
+{
+    mCol0 = tfrm.getCol0();
+    mCol1 = tfrm.getCol1();
+    mCol2 = tfrm.getCol2();
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 Transform3::getUpper3x3( ) const
+{
+    return Matrix3( mCol0, mCol1, mCol2 );
+}
+
+VECTORMATH_FORCE_INLINE Transform3 & Transform3::setTranslation( const Vector3 &translateVec )
+{
+    mCol3 = translateVec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Transform3::getTranslation( ) const
+{
+    return mCol3;
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, res1, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res1 = vec_sel( zero, c, select_y );
+    res1 = vec_sel( res1, s, select_z );
+    res2 = vec_sel( zero, negatef4(s), select_y );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3( res1 ),
+        Vector3( res2 ),
+        Vector3( _mm_setzero_ps() )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, res0, res2;
+    __m128 zero;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, negatef4(s), select_z );
+    res2 = vec_sel( zero, s, select_x );
+    res2 = vec_sel( res2, c, select_z );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3::yAxis( ),
+        Vector3( res2 ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, res0, res1;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+    __m128 zero = _mm_setzero_ps();
+    sincosf4( radians.get128(), &s, &c );
+    res0 = vec_sel( zero, c, select_x );
+    res0 = vec_sel( res0, s, select_y );
+    res1 = vec_sel( zero, negatef4(s), select_x );
+    res1 = vec_sel( res1, c, select_y );
+    return Transform3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3::zAxis( ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotationZYX( const Vector3 &radiansXYZ )
+{
+    __m128 angles, s, negS, c, X0, X1, Y0, Y1, Z0, Z1, tmp;
+    angles = Vector4( radiansXYZ, 0.0f ).get128();
+    sincosf4( angles, &s, &c );
+    negS = negatef4( s );
+    Z0 = vec_mergel( c, s );
+    Z1 = vec_mergel( negS, c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_xyz[4] = {0xffffffff, 0xffffffff, 0xffffffff, 0};
+    Z1 = vec_and( Z1, _mm_load_ps( (float *)select_xyz ) );
+	Y0 = _mm_shuffle_ps( c, negS, _MM_SHUFFLE(0,1,1,1) );
+	Y1 = _mm_shuffle_ps( s, c, _MM_SHUFFLE(0,1,1,1) );
+    X0 = vec_splat( s, 0 );
+    X1 = vec_splat( c, 0 );
+    tmp = vec_mul( Z0, Y1 );
+    return Transform3(
+        Vector3( vec_mul( Z0, Y0 ) ),
+        Vector3( vec_madd( Z1, X1, vec_mul( tmp, X0 ) ) ),
+        Vector3( vec_nmsub( Z1, X0, vec_mul( tmp, X1 ) ) ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    return Transform3( Matrix3::rotation( radians, unitVec ), Vector3( 0.0f ) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::rotation( const Quat &unitQuat )
+{
+    return Transform3( Matrix3( unitQuat ), Vector3( 0.0f ) );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::scale( const Vector3 &scaleVec )
+{
+    __m128 zero = _mm_setzero_ps();
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    return Transform3(
+        Vector3( vec_sel( zero, scaleVec.get128(), select_x ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_y ) ),
+        Vector3( vec_sel( zero, scaleVec.get128(), select_z ) ),
+        Vector3( 0.0f )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec )
+{
+    return Transform3(
+        ( tfrm.getCol0() * scaleVec.getX( ) ),
+        ( tfrm.getCol1() * scaleVec.getY( ) ),
+        ( tfrm.getCol2() * scaleVec.getZ( ) ),
+        tfrm.getCol3()
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm )
+{
+    return Transform3(
+        mulPerElem( tfrm.getCol0(), scaleVec ),
+        mulPerElem( tfrm.getCol1(), scaleVec ),
+        mulPerElem( tfrm.getCol2(), scaleVec ),
+        mulPerElem( tfrm.getCol3(), scaleVec )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 Transform3::translation( const Vector3 &translateVec )
+{
+    return Transform3(
+        Vector3::xAxis( ),
+        Vector3::yAxis( ),
+        Vector3::zAxis( ),
+        translateVec
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 )
+{
+    return Transform3(
+        select( tfrm0.getCol0(), tfrm1.getCol0(), select1 ),
+        select( tfrm0.getCol1(), tfrm1.getCol1(), select1 ),
+        select( tfrm0.getCol2(), tfrm1.getCol2(), select1 ),
+        select( tfrm0.getCol3(), tfrm1.getCol3(), select1 )
+    );
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm )
+{
+    print( tfrm.getRow( 0 ) );
+    print( tfrm.getRow( 1 ) );
+    print( tfrm.getRow( 2 ) );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name )
+{
+    printf("%s:\n", name);
+    print( tfrm );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Matrix3 & tfrm )
+{
+    __m128 res;
+    __m128 col0, col1, col2;
+    __m128 xx_yy, xx_yy_zz_xx, yy_zz_xx_yy, zz_xx_yy_zz, diagSum, diagDiff;
+    __m128 zy_xz_yx, yz_zx_xy, sum, diff;
+    __m128 radicand, invSqrt, scale;
+    __m128 res0, res1, res2, res3;
+    __m128 xx, yy, zz;
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_w[4] = {0, 0, 0, 0xffffffff};
+
+    col0 = tfrm.getCol0().get128();
+    col1 = tfrm.getCol1().get128();
+    col2 = tfrm.getCol2().get128();
+
+    /* four cases: */
+    /* trace > 0 */
+    /* else */
+    /*    xx largest diagonal element */
+    /*    yy largest diagonal element */
+    /*    zz largest diagonal element */
+
+    /* compute quaternion for each case */
+
+    xx_yy = vec_sel( col0, col1, select_y );
+    //xx_yy_zz_xx = vec_perm( xx_yy, col2, _VECTORMATH_PERM_XYCX );
+    //yy_zz_xx_yy = vec_perm( xx_yy, col2, _VECTORMATH_PERM_YCXY );
+    //zz_xx_yy_zz = vec_perm( xx_yy, col2, _VECTORMATH_PERM_CXYC );
+    xx_yy_zz_xx = _mm_shuffle_ps( xx_yy, xx_yy, _MM_SHUFFLE(0,0,1,0) );
+    xx_yy_zz_xx = vec_sel( xx_yy_zz_xx, col2, select_z ); // TODO: Ck
+    yy_zz_xx_yy = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(1,0,2,1) );
+    zz_xx_yy_zz = _mm_shuffle_ps( xx_yy_zz_xx, xx_yy_zz_xx, _MM_SHUFFLE(2,1,0,2) );
+
+    diagSum = vec_add( vec_add( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    diagDiff = vec_sub( vec_sub( xx_yy_zz_xx, yy_zz_xx_yy ), zz_xx_yy_zz );
+    radicand = vec_add( vec_sel( diagDiff, diagSum, select_w ), _mm_set1_ps(1.0f) );
+ //   invSqrt = rsqrtf4( radicand );
+	invSqrt = newtonrapson_rsqrt4( radicand );
+
+	
+
+    zy_xz_yx = vec_sel( col0, col1, select_z );									// zy_xz_yx = 00 01 12 03
+    //zy_xz_yx = vec_perm( zy_xz_yx, col2, _VECTORMATH_PERM_ZAYX );
+	zy_xz_yx = _mm_shuffle_ps( zy_xz_yx, zy_xz_yx, _MM_SHUFFLE(0,1,2,2) );		// zy_xz_yx = 12 12 01 00
+    zy_xz_yx = vec_sel( zy_xz_yx, vec_splat(col2, 0), select_y );				// zy_xz_yx = 12 20 01 00
+    yz_zx_xy = vec_sel( col0, col1, select_x );									// yz_zx_xy = 10 01 02 03
+    //yz_zx_xy = vec_perm( yz_zx_xy, col2, _VECTORMATH_PERM_BZXX );
+	yz_zx_xy = _mm_shuffle_ps( yz_zx_xy, yz_zx_xy, _MM_SHUFFLE(0,0,2,0) );		// yz_zx_xy = 10 02 10 10
+	yz_zx_xy = vec_sel( yz_zx_xy, vec_splat(col2, 1), select_x );				// yz_zx_xy = 21 02 10 10
+
+    sum = vec_add( zy_xz_yx, yz_zx_xy );
+    diff = vec_sub( zy_xz_yx, yz_zx_xy );
+
+    scale = vec_mul( invSqrt, _mm_set1_ps(0.5f) );
+
+    //res0 = vec_perm( sum, diff, _VECTORMATH_PERM_XZYA );
+	res0 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,1,2,0) );
+	res0 = vec_sel( res0, vec_splat(diff, 0), select_w );  // TODO: Ck
+    //res1 = vec_perm( sum, diff, _VECTORMATH_PERM_ZXXB );
+	res1 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,2) );
+	res1 = vec_sel( res1, vec_splat(diff, 1), select_w );  // TODO: Ck
+    //res2 = vec_perm( sum, diff, _VECTORMATH_PERM_YXXC );
+	res2 = _mm_shuffle_ps( sum, sum, _MM_SHUFFLE(0,0,0,1) );
+	res2 = vec_sel( res2, vec_splat(diff, 2), select_w );  // TODO: Ck
+    res3 = diff;
+    res0 = vec_sel( res0, radicand, select_x );
+    res1 = vec_sel( res1, radicand, select_y );
+    res2 = vec_sel( res2, radicand, select_z );
+    res3 = vec_sel( res3, radicand, select_w );
+    res0 = vec_mul( res0, vec_splat( scale, 0 ) );
+    res1 = vec_mul( res1, vec_splat( scale, 1 ) );
+    res2 = vec_mul( res2, vec_splat( scale, 2 ) );
+    res3 = vec_mul( res3, vec_splat( scale, 3 ) );
+
+    /* determine case and select answer */
+
+    xx = vec_splat( col0, 0 );
+    yy = vec_splat( col1, 1 );
+    zz = vec_splat( col2, 2 );
+    res = vec_sel( res0, res1, vec_cmpgt( yy, xx ) );
+    res = vec_sel( res, res2, vec_and( vec_cmpgt( zz, xx ), vec_cmpgt( zz, yy ) ) );
+    res = vec_sel( res, res3, vec_cmpgt( vec_splat( diagSum, 0 ), _mm_setzero_ps() ) );
+    mVec128 = res;
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &tfrm0, const Vector3 &tfrm1 )
+{
+    return Matrix3(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &tfrm0, const Vector4 &tfrm1 )
+{
+    return Matrix4(
+        ( tfrm0 * tfrm1.getX( ) ),
+        ( tfrm0 * tfrm1.getY( ) ),
+        ( tfrm0 * tfrm1.getZ( ) ),
+        ( tfrm0 * tfrm1.getW( ) )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat )
+{
+    __m128 tmp0, tmp1, mcol0, mcol1, mcol2, res;
+    __m128 xxxx, yyyy, zzzz;
+    tmp0 = vec_mergeh( mat.getCol0().get128(), mat.getCol2().get128() );
+    tmp1 = vec_mergel( mat.getCol0().get128(), mat.getCol2().get128() );
+    xxxx = vec_splat( vec.get128(), 0 );
+    mcol0 = vec_mergeh( tmp0, mat.getCol1().get128() );
+    //mcol1 = vec_perm( tmp0, mat.getCol1().get128(), _VECTORMATH_PERM_ZBWX );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	mcol1 = _mm_shuffle_ps( tmp0, tmp0, _MM_SHUFFLE(0,3,2,2));
+	mcol1 = vec_sel(mcol1, mat.getCol1().get128(), select_y);
+    //mcol2 = vec_perm( tmp1, mat.getCol1().get128(), _VECTORMATH_PERM_XCYX );
+	mcol2 = _mm_shuffle_ps( tmp1, tmp1, _MM_SHUFFLE(0,1,1,0));
+	mcol2 = vec_sel(mcol2, vec_splat(mat.getCol1().get128(), 2), select_y);
+    yyyy = vec_splat( vec.get128(), 1 );
+    res = vec_mul( mcol0, xxxx );
+    zzzz = vec_splat( vec.get128(), 2 );
+    res = vec_madd( mcol1, yyyy, res );
+    res = vec_madd( mcol2, zzzz, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec )
+{
+    __m128 neg, res0, res1, res2;
+    neg = negatef4( vec.get128() );
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_x[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_y[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int select_z[4] = {0, 0, 0xffffffff, 0};
+    //res0 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_XZBX );
+	res0 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,2,2,0) );
+	res0 = vec_sel(res0, vec_splat(neg, 1), select_z);
+    //res1 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_CXXX );
+	res1 = vec_sel(vec_splat(vec.get128(), 0), vec_splat(neg, 2), select_x);
+    //res2 = vec_perm( vec.get128(), neg, _VECTORMATH_PERM_YAXX );
+	res2 = _mm_shuffle_ps( vec.get128(), vec.get128(), _MM_SHUFFLE(0,0,1,1) );
+	res2 = vec_sel(res2, vec_splat(neg, 0), select_y);
+	VM_ATTRIBUTE_ALIGN16 unsigned int filter_x[4] = {0, 0xffffffff, 0xffffffff, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int filter_y[4] = {0xffffffff, 0, 0xffffffff, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int filter_z[4] = {0xffffffff, 0xffffffff, 0, 0xffffffff};
+    res0 = vec_and( res0, _mm_load_ps((float *)filter_x ) );
+    res1 = vec_and( res1, _mm_load_ps((float *)filter_y ) );
+    res2 = vec_and( res2, _mm_load_ps((float *)filter_z ) ); // TODO: Use selects?
+    return Matrix3(
+        Vector3( res0 ),
+        Vector3( res1 ),
+        Vector3( res2 )
+    );
+}
+
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat )
+{
+    return Matrix3( cross( vec, mat.getCol0() ), cross( vec, mat.getCol1() ), cross( vec, mat.getCol2() ) );
+}
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Engine/lib/bullet/src/vectormath/sse/quat_aos.h b/Engine/lib/bullet/src/vectormath/sse/quat_aos.h
new file mode 100644
index 000000000..7eac59fe5
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/sse/quat_aos.h
@@ -0,0 +1,579 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_QUAT_AOS_CPP_H
+#define _VECTORMATH_QUAT_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+VECTORMATH_FORCE_INLINE void Quat::set128(vec_float4 vec)
+{
+    mVec128 = vec;
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
+{
+	mVec128 = _mm_unpacklo_ps(
+		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
+		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Vector3 &xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+
+
+VECTORMATH_FORCE_INLINE  Quat::Quat(const Quat& quat)
+{
+	mVec128 = quat.get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( float _x, float _y, float _z, float _w )
+{
+	mVec128 = _mm_setr_ps(_x, _y, _z, _w);
+}
+
+
+
+
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Vector3 &xyz, const floatInVec &_w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const Vector4 &vec )
+{
+    mVec128 = vec.get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Quat::Quat( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::identity( )
+{
+    return Quat( _VECTORMATH_UNIT_0001 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat lerp( float t, const Quat &quat0, const Quat &quat1 )
+{
+    return lerp( floatInVec(t), quat0, quat1 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 )
+{
+    return ( quat0 + ( ( quat1 - quat0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 )
+{
+    return slerp( floatInVec(t), unitQuat0, unitQuat1 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 )
+{
+    Quat start;
+    vec_float4 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    __m128 selectMask;
+    cosAngle = _vmathVfDot4( unitQuat0.get128(), unitQuat1.get128() );
+    selectMask = (__m128)vec_cmpgt( _mm_setzero_ps(), cosAngle );
+    cosAngle = vec_sel( cosAngle, negatef4( cosAngle ), selectMask );
+    start = Quat( vec_sel( unitQuat0.get128(), negatef4( unitQuat0.get128() ), selectMask ) );
+    selectMask = (__m128)vec_cmpgt( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = vec_sub( _mm_set1_ps(1.0f), tttt );
+    angles = vec_mergeh( _mm_set1_ps(1.0f), tttt );
+    angles = vec_mergeh( angles, oneMinusT );
+    angles = vec_madd( angles, angle, _mm_setzero_ps() );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Quat( vec_madd( start.get128(), scale0, vec_mul( unitQuat1.get128(), scale1 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
+{
+    return squad( floatInVec(t), unitQuat0, unitQuat1, unitQuat2, unitQuat3 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 )
+{
+    return slerp( ( ( floatInVec(2.0f) * t ) * ( floatInVec(1.0f) - t ) ), slerp( t, unitQuat0, unitQuat3 ), slerp( t, unitQuat1, unitQuat2 ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Quat::get128( ) const
+{
+    return mVec128;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator =( const Quat &quat )
+{
+    mVec128 = quat.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setXYZ( const Vector3 &vec )
+{
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+	mVec128 = vec_sel( vec.get128(), mVec128, sw );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Quat::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setW( const floatInVec &_w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Quat::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Quat::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator +( const Quat &quat ) const
+{
+    return Quat( _mm_add_ps( mVec128, quat.mVec128 ) );
+}
+
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator -( const Quat &quat ) const
+{
+    return Quat( _mm_sub_ps( mVec128, quat.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator *( const floatInVec &scalar ) const
+{
+    return Quat( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator +=( const Quat &quat )
+{
+    *this = *this + quat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator -=( const Quat &quat )
+{
+    *this = *this - quat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator /( const floatInVec &scalar ) const
+{
+    return Quat( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator -( ) const
+{
+	return Quat(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat operator *( float scalar, const Quat &quat )
+{
+    return floatInVec(scalar) * quat;
+}
+
+VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar, const Quat &quat )
+{
+    return quat * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Quat &quat0, const Quat &quat1 )
+{
+    return floatInVec( _vmathVfDot4( quat0.get128(), quat1.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec norm( const Quat &quat )
+{
+    return floatInVec(  _vmathVfDot4( quat.get128(), quat.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec length( const Quat &quat )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( quat.get128(), quat.get128() )), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const Quat normalize( const Quat &quat )
+{
+	vec_float4 dot =_vmathVfDot4( quat.get128(), quat.get128());
+    return Quat( _mm_mul_ps( quat.get128(), newtonrapson_rsqrt4( dot ) ) );
+}
+
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    Vector3 crossVec;
+    __m128 cosAngle, cosAngleX2Plus2, recipCosHalfAngleX2, cosHalfAngleX2, res;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    cosAngleX2Plus2 = vec_madd( cosAngle, _mm_set1_ps(2.0f), _mm_set1_ps(2.0f) );
+    recipCosHalfAngleX2 = _mm_rsqrt_ps( cosAngleX2Plus2 );
+    cosHalfAngleX2 = vec_mul( recipCosHalfAngleX2, cosAngleX2Plus2 );
+    crossVec = cross( unitVec0, unitVec1 );
+    res = vec_mul( crossVec.get128(), recipCosHalfAngleX2 );
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( res, vec_mul( cosHalfAngleX2, _mm_set1_ps(0.5f) ), sw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotation( float radians, const Vector3 &unitVec )
+{
+    return rotation( floatInVec(radians), unitVec );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotation( const floatInVec &radians, const Vector3 &unitVec )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( vec_mul( unitVec.get128(), s ), c, sw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationX( float radians )
+{
+    return rotationX( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationX( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0xffffffff, 0, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, xsw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationY( float radians )
+{
+    return rotationY( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationY( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int ysw[4] = {0, 0xffffffff, 0, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, ysw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationZ( float radians )
+{
+    return rotationZ( floatInVec(radians) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::rotationZ( const floatInVec &radians )
+{
+    __m128 s, c, angle, res;
+    angle = vec_mul( radians.get128(), _mm_set1_ps(0.5f) );
+    sincosf4( angle, &s, &c );
+	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0, 0, 0xffffffff, 0};
+	VM_ATTRIBUTE_ALIGN16 unsigned int wsw[4] = {0, 0, 0, 0xffffffff};
+    res = vec_sel( _mm_setzero_ps(), s, zsw );
+    res = vec_sel( res, c, wsw );
+    return Quat( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat Quat::operator *( const Quat &quat ) const
+{
+    __m128 ldata, rdata, qv, tmp0, tmp1, tmp2, tmp3;
+    __m128 product, l_wxyz, r_wxyz, xy, qw;
+    ldata = mVec128;
+    rdata = quat.mVec128;
+    tmp0 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( ldata, ldata, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( rdata, rdata, _MM_SHUFFLE(3,0,2,1) );
+    qv = vec_mul( vec_splat( ldata, 3 ), rdata );
+    qv = vec_madd( vec_splat( rdata, 3 ), ldata, qv );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_mul( ldata, rdata );
+    l_wxyz = vec_sld( ldata, ldata, 12 );
+    r_wxyz = vec_sld( rdata, rdata, 12 );
+    qw = vec_nmsub( l_wxyz, r_wxyz, product );
+    xy = vec_madd( l_wxyz, r_wxyz, product );
+    qw = vec_sub( qw, vec_sld( xy, xy, 8 ) );
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+    return Quat( vec_sel( qv, qw, sw ) );
+}
+
+VECTORMATH_FORCE_INLINE Quat & Quat::operator *=( const Quat &quat )
+{
+    *this = *this * quat;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 rotate( const Quat &quat, const Vector3 &vec )
+{    __m128 qdata, vdata, product, tmp0, tmp1, tmp2, tmp3, wwww, qv, qw, res;
+    qdata = quat.get128();
+    vdata = vec.get128();
+    tmp0 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( qdata, qdata, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( vdata, vdata, _MM_SHUFFLE(3,0,2,1) );
+    wwww = vec_splat( qdata, 3 );
+    qv = vec_mul( wwww, vdata );
+    qv = vec_madd( tmp0, tmp1, qv );
+    qv = vec_nmsub( tmp2, tmp3, qv );
+    product = vec_mul( qdata, vdata );
+    qw = vec_madd( vec_sld( qdata, qdata, 4 ), vec_sld( vdata, vdata, 4 ), product );
+    qw = vec_add( vec_sld( product, product, 8 ), qw );
+    tmp1 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( qv, qv, _MM_SHUFFLE(3,0,2,1) );
+    res = vec_mul( vec_splat( qw, 0 ), qdata );
+    res = vec_madd( wwww, qv, res );
+    res = vec_madd( tmp0, tmp1, res );
+    res = vec_nmsub( tmp2, tmp3, res );
+    return Vector3( res );
+}
+
+VECTORMATH_FORCE_INLINE const Quat conj( const Quat &quat )
+{
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0x80000000,0x80000000,0x80000000,0};
+    return Quat( vec_xor( quat.get128(), _mm_load_ps((float *)sw) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, bool select1 )
+{
+    return select( quat0, quat1, boolInVec(select1) );
+}
+
+//VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 )
+//{
+//    return Quat( vec_sel( quat0.get128(), quat1.get128(), select1.get128() ) );
+//}
+
+VECTORMATH_FORCE_INLINE void loadXYZW(Quat& quat, const float* fptr)
+{
+#ifdef USE_SSE3_LDDQU
+	quat = Quat(	SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128		);
+#else
+	SSEFloat fl;
+	fl.f[0] = fptr[0];
+	fl.f[1] = fptr[1];
+	fl.f[2] = fptr[2];
+	fl.f[3] = fptr[3];
+    quat = Quat(	fl.m128);
+#endif
+    
+
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZW(const Quat& quat, float* fptr)
+{
+	fptr[0] = quat.getX();
+	fptr[1] = quat.getY();
+	fptr[2] = quat.getZ();
+	fptr[3] = quat.getW();
+//    _mm_storeu_ps((float*)quat.get128(),fptr);
+}
+
+
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Quat &quat )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Quat &quat, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = quat.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Engine/lib/bullet/src/vectormath/sse/vec_aos.h b/Engine/lib/bullet/src/vectormath/sse/vec_aos.h
new file mode 100644
index 000000000..35aeeaf16
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/sse/vec_aos.h
@@ -0,0 +1,1455 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VEC_AOS_CPP_H
+#define _VECTORMATH_VEC_AOS_CPP_H
+
+//-----------------------------------------------------------------------------
+// Constants
+// for permutes words are labeled [x,y,z,w] [a,b,c,d]
+
+#define _VECTORMATH_PERM_X 0x00010203
+#define _VECTORMATH_PERM_Y 0x04050607
+#define _VECTORMATH_PERM_Z 0x08090a0b
+#define _VECTORMATH_PERM_W 0x0c0d0e0f
+#define _VECTORMATH_PERM_A 0x10111213
+#define _VECTORMATH_PERM_B 0x14151617
+#define _VECTORMATH_PERM_C 0x18191a1b
+#define _VECTORMATH_PERM_D 0x1c1d1e1f
+#define _VECTORMATH_PERM_XYZA (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A }
+#define _VECTORMATH_PERM_ZXYW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZXW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_X, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_YZAB (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Y, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B }
+#define _VECTORMATH_PERM_ZABC (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_Z, _VECTORMATH_PERM_A, _VECTORMATH_PERM_B, _VECTORMATH_PERM_C }
+#define _VECTORMATH_PERM_XYAW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_Y, _VECTORMATH_PERM_A, _VECTORMATH_PERM_W }
+#define _VECTORMATH_PERM_XAZW (vec_uchar16)(vec_uint4){ _VECTORMATH_PERM_X, _VECTORMATH_PERM_A, _VECTORMATH_PERM_Z, _VECTORMATH_PERM_W }
+#define _VECTORMATH_MASK_0xF000 (vec_uint4){ 0xffffffff, 0, 0, 0 }
+#define _VECTORMATH_MASK_0x0F00 (vec_uint4){ 0, 0xffffffff, 0, 0 }
+#define _VECTORMATH_MASK_0x00F0 (vec_uint4){ 0, 0, 0xffffffff, 0 }
+#define _VECTORMATH_MASK_0x000F (vec_uint4){ 0, 0, 0, 0xffffffff }
+#define _VECTORMATH_UNIT_1000 _mm_setr_ps(1.0f,0.0f,0.0f,0.0f) // (__m128){ 1.0f, 0.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0100 _mm_setr_ps(0.0f,1.0f,0.0f,0.0f) // (__m128){ 0.0f, 1.0f, 0.0f, 0.0f }
+#define _VECTORMATH_UNIT_0010 _mm_setr_ps(0.0f,0.0f,1.0f,0.0f) // (__m128){ 0.0f, 0.0f, 1.0f, 0.0f }
+#define _VECTORMATH_UNIT_0001 _mm_setr_ps(0.0f,0.0f,0.0f,1.0f) // (__m128){ 0.0f, 0.0f, 0.0f, 1.0f }
+#define _VECTORMATH_SLERP_TOL 0.999f
+//_VECTORMATH_SLERP_TOLF
+
+//-----------------------------------------------------------------------------
+// Definitions
+
+#ifndef _VECTORMATH_INTERNAL_FUNCTIONS
+#define _VECTORMATH_INTERNAL_FUNCTIONS
+
+#define     _vmath_shufps(a, b, immx, immy, immz, immw) _mm_shuffle_ps(a, b, _MM_SHUFFLE(immw, immz, immy, immx))
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot3( __m128 vec0, __m128 vec1 )
+{
+	__m128 result = _mm_mul_ps( vec0, vec1);
+    return _mm_add_ps( vec_splat( result, 0 ), _mm_add_ps( vec_splat( result, 1 ), vec_splat( result, 2 ) ) );
+}
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfDot4( __m128 vec0, __m128 vec1 )
+{
+    __m128 result = _mm_mul_ps(vec0, vec1);
+	return _mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(0,0,0,0)),
+			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(1,1,1,1)),
+			_mm_add_ps(_mm_shuffle_ps(result, result, _MM_SHUFFLE(2,2,2,2)), _mm_shuffle_ps(result, result, _MM_SHUFFLE(3,3,3,3)))));
+}
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfCross( __m128 vec0, __m128 vec1 )
+{
+    __m128 tmp0, tmp1, tmp2, tmp3, result;
+    tmp0 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,0,2,1) );
+    tmp1 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,1,0,2) );
+    tmp2 = _mm_shuffle_ps( vec0, vec0, _MM_SHUFFLE(3,1,0,2) );
+    tmp3 = _mm_shuffle_ps( vec1, vec1, _MM_SHUFFLE(3,0,2,1) );
+    result = vec_mul( tmp0, tmp1 );
+    result = vec_nmsub( tmp2, tmp3, result );
+    return result;
+}
+/*
+static VECTORMATH_FORCE_INLINE vec_uint4 _vmathVfToHalfFloatsUnpacked(__m128 v)
+{
+#if 0
+    vec_int4 bexp;
+    vec_uint4 mant, sign, hfloat;
+    vec_uint4 notZero, isInf;
+    const vec_uint4 hfloatInf = (vec_uint4)(0x00007c00u);
+    const vec_uint4 mergeMant = (vec_uint4)(0x000003ffu);
+    const vec_uint4 mergeSign = (vec_uint4)(0x00008000u);
+
+    sign = vec_sr((vec_uint4)v, (vec_uint4)16);
+    mant = vec_sr((vec_uint4)v, (vec_uint4)13);
+    bexp = vec_and(vec_sr((vec_int4)v, (vec_uint4)23), (vec_int4)0xff);
+
+    notZero = (vec_uint4)vec_cmpgt(bexp, (vec_int4)112);
+    isInf = (vec_uint4)vec_cmpgt(bexp, (vec_int4)142);
+
+    bexp = _mm_add_ps(bexp, (vec_int4)-112);
+    bexp = vec_sl(bexp, (vec_uint4)10);
+
+    hfloat = vec_sel((vec_uint4)bexp, mant, mergeMant);
+    hfloat = vec_sel((vec_uint4)(0), hfloat, notZero);
+    hfloat = vec_sel(hfloat, hfloatInf, isInf);
+    hfloat = vec_sel(hfloat, sign, mergeSign);
+
+    return hfloat;
+#else
+	assert(0);
+	return _mm_setzero_ps();
+#endif
+}
+
+static VECTORMATH_FORCE_INLINE vec_ushort8 _vmath2VfToHalfFloats(__m128 u, __m128 v)
+{
+#if 0
+    vec_uint4 hfloat_u, hfloat_v;
+    const vec_uchar16 pack = (vec_uchar16){2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31};
+    hfloat_u = _vmathVfToHalfFloatsUnpacked(u);
+    hfloat_v = _vmathVfToHalfFloatsUnpacked(v);
+    return (vec_ushort8)vec_perm(hfloat_u, hfloat_v, pack);
+#else
+	assert(0);
+	return _mm_setzero_si128();
+#endif
+}
+*/
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfInsert(__m128 dst, __m128 src, int slot)
+{
+	SSEFloat s;
+	s.m128 = src;
+	SSEFloat d;
+	d.m128 = dst;
+	d.f[slot] = s.f[slot];
+	return d.m128;
+}
+
+#define _vmathVfSetElement(vec, scalar, slot) ((float *)&(vec))[slot] = scalar
+
+static VECTORMATH_FORCE_INLINE __m128 _vmathVfSplatScalar(float scalar)
+{
+	return _mm_set1_ps(scalar);
+}
+
+#endif
+
+namespace Vectormath {
+namespace Aos {
+
+	
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+VECTORMATH_FORCE_INLINE VecIdx::operator floatInVec() const
+{
+    return floatInVec(ref, i);
+}
+
+VECTORMATH_FORCE_INLINE float VecIdx::getAsFloat() const
+#else
+VECTORMATH_FORCE_INLINE VecIdx::operator float() const
+#endif
+{
+    return ((float *)&ref)[i];
+}
+
+VECTORMATH_FORCE_INLINE float VecIdx::operator =( float scalar )
+{
+    _vmathVfSetElement(ref, scalar, i);
+    return scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const floatInVec &scalar )
+{
+    ref = _vmathVfInsert(ref, scalar.get128(), i);
+    return scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator =( const VecIdx& scalar )
+{
+    return *this = floatInVec(scalar.ref, scalar.i);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( float scalar )
+{
+    return *this *= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator *=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) * scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator /=( float scalar )
+{
+    return *this /= floatInVec(scalar);
+}
+
+inline floatInVec VecIdx::operator /=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) / scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( float scalar )
+{
+    return *this += floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator +=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) + scalar;
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( float scalar )
+{
+    return *this -= floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE floatInVec VecIdx::operator -=( const floatInVec &scalar )
+{
+    return *this = floatInVec(ref, i) - scalar;
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3(const Vector3& vec)
+{
+    set128(vec.get128());
+}
+
+VECTORMATH_FORCE_INLINE void Vector3::set128(vec_float4 vec)
+{
+    mVec128 = vec;
+}
+
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( float _x, float _y, float _z )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
+{
+	__m128 xz = _mm_unpacklo_ps( _x.get128(), _z.get128() );
+	mVec128 = _mm_unpacklo_ps( xz, _y.get128() );
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( const Point3 &pnt )
+{
+    mVec128 = pnt.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector3::Vector3( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::xAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_1000 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::yAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0100 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::zAxis( )
+{
+    return Vector3( _VECTORMATH_UNIT_0010 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 )
+{
+    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    cosAngle = _vmathVfDot3( unitVec0.get128(), unitVec1.get128() );
+    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
+    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
+    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
+    angles = _mm_mul_ps( angles, angle );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector3( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Vector3::get128( ) const
+{
+    return mVec128;
+}
+
+VECTORMATH_FORCE_INLINE void loadXYZ(Point3& vec, const float* fptr)
+{
+#ifdef USE_SSE3_LDDQU
+	vec = Point3(	SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
+#else
+	SSEFloat fl;
+	fl.f[0] = fptr[0];
+	fl.f[1] = fptr[1];
+	fl.f[2] = fptr[2];
+	fl.f[3] = fptr[3];
+    vec = Point3(	fl.m128);
+#endif //USE_SSE3_LDDQU
+	
+}
+
+
+
+VECTORMATH_FORCE_INLINE void loadXYZ(Vector3& vec, const float* fptr)
+{
+#ifdef USE_SSE3_LDDQU
+	vec = Vector3(	SSEFloat(_mm_lddqu_si128((const __m128i*)((float*)(fptr)))).m128 );
+#else
+	SSEFloat fl;
+	fl.f[0] = fptr[0];
+	fl.f[1] = fptr[1];
+	fl.f[2] = fptr[2];
+	fl.f[3] = fptr[3];
+    vec = Vector3(	fl.m128);
+#endif //USE_SSE3_LDDQU
+	
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad )
+{
+	__m128 dstVec = *quad;
+	VM_ATTRIBUTE_ALIGN16  unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
+	dstVec = vec_sel(vec.get128(), dstVec, sw);
+	*quad = dstVec;
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ(const Point3& vec, float* fptr)
+{
+	fptr[0] = vec.getX();
+	fptr[1] = vec.getY();
+	fptr[2] = vec.getZ();
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ(const Vector3& vec, float* fptr)
+{
+	fptr[0] = vec.getX();
+	fptr[1] = vec.getY();
+	fptr[2] = vec.getZ();
+}
+
+
+VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads )
+{
+	const float *quads = (float *)threeQuads;
+    vec0 = Vector3(  _mm_load_ps(quads) );
+    vec1 = Vector3( _mm_loadu_ps(quads + 3) );
+    vec2 = Vector3( _mm_loadu_ps(quads + 6) );
+    vec3 = Vector3( _mm_loadu_ps(quads + 9) );
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads )
+{
+	__m128 xxxx = _mm_shuffle_ps( vec1.get128(), vec1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
+	__m128 zzzz = _mm_shuffle_ps( vec2.get128(), vec2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
+	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
+	threeQuads[0] = vec_sel( vec0.get128(), xxxx, xsw );
+    threeQuads[1] = _mm_shuffle_ps( vec1.get128(), vec2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
+    threeQuads[2] = vec_sel( _mm_shuffle_ps( vec3.get128(), vec3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
+}
+/*
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads )
+{
+	assert(0);
+#if 0
+    __m128 xyz0[3];
+    __m128 xyz1[3];
+    storeXYZArray( vec0, vec1, vec2, vec3, xyz0 );
+    storeXYZArray( vec4, vec5, vec6, vec7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+#endif
+}
+*/
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator =( const Vector3 &vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Vector3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator +( const Vector3 &vec ) const
+{
+    return Vector3( _mm_add_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( const Vector3 &vec ) const
+{
+    return Vector3( _mm_sub_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Vector3::operator +( const Point3 &pnt ) const
+{
+    return Point3( _mm_add_ps( mVec128, pnt.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator *( const floatInVec &scalar ) const
+{
+    return Vector3( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator +=( const Vector3 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator -=( const Vector3 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator /( const floatInVec &scalar ) const
+{
+    return Vector3( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector3 & Vector3::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector3::operator -( ) const
+{
+	//return Vector3(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+
+	VM_ATTRIBUTE_ALIGN16 static const int array[] = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
+	__m128 NEG_MASK = SSEFloat(*(const vec_float4*)array).vf;
+	return Vector3(_mm_xor_ps(get128(),NEG_MASK));
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec )
+{
+    return vec * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_div_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec )
+{
+    return Vector3( _mm_rcp_ps( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec )
+{
+    return Vector3( fabsf4( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Vector3( _mm_or_ps(
+		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
+		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_max_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec )
+{
+    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _mm_min_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec )
+{
+    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec )
+{
+    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ), vec_splat( vec.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return floatInVec( _vmathVfDot3( vec0.get128(), vec1.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec )
+{
+    return floatInVec(  _vmathVfDot3( vec.get128(), vec.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot3( vec.get128(), vec.get128() )), 0 );
+}
+
+
+VECTORMATH_FORCE_INLINE const Vector3 normalizeApprox( const Vector3 &vec )
+{
+    return Vector3( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec )
+{
+	return Vector3( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot3( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 )
+{
+    return Vector3( _vmathVfCross( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+
+VECTORMATH_FORCE_INLINE  const Vector4 select(const Vector4& vec0, const Vector4& vec1, const boolInVec& select1)
+{
+    return Vector4(vec_sel(vec0.get128(), vec1.get128(), select1.get128()));
+}
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( float _x, float _y, float _z, float _w )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, _w); 
+ }
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z, const floatInVec &_w )
+{
+	mVec128 = _mm_unpacklo_ps(
+		_mm_unpacklo_ps( _x.get128(), _z.get128() ),
+		_mm_unpacklo_ps( _y.get128(), _w.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, float _w )
+{
+    mVec128 = xyz.get128();
+    _vmathVfSetElement(mVec128, _w, 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &xyz, const floatInVec &_w )
+{
+    mVec128 = xyz.get128();
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Vector3 &vec )
+{
+    mVec128 = vec.get128();
+    mVec128 = _vmathVfInsert(mVec128, _mm_setzero_ps(), 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Point3 &pnt )
+{
+    mVec128 = pnt.get128();
+    mVec128 = _vmathVfInsert(mVec128, _mm_set1_ps(1.0f), 3);
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const Quat &quat )
+{
+    mVec128 = quat.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Vector4::Vector4( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::xAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_1000 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::yAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0100 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::zAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0010 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::wAxis( )
+{
+    return Vector4( _VECTORMATH_UNIT_0001 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return lerp( floatInVec(t), vec0, vec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return ( vec0 + ( ( vec1 - vec0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
+{
+    return slerp( floatInVec(t), unitVec0, unitVec1 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 )
+{
+    __m128 scales, scale0, scale1, cosAngle, angle, tttt, oneMinusT, angles, sines;
+    cosAngle = _vmathVfDot4( unitVec0.get128(), unitVec1.get128() );
+    __m128 selectMask = _mm_cmpgt_ps( _mm_set1_ps(_VECTORMATH_SLERP_TOL), cosAngle );
+    angle = acosf4( cosAngle );
+    tttt = t.get128();
+    oneMinusT = _mm_sub_ps( _mm_set1_ps(1.0f), tttt );
+    angles = _mm_unpacklo_ps( _mm_set1_ps(1.0f), tttt ); // angles = 1, t, 1, t
+    angles = _mm_unpacklo_ps( angles, oneMinusT );		// angles = 1, 1-t, t, 1-t
+    angles = _mm_mul_ps( angles, angle );
+    sines = sinf4( angles );
+    scales = _mm_div_ps( sines, vec_splat( sines, 0 ) );
+    scale0 = vec_sel( oneMinusT, vec_splat( scales, 1 ), selectMask );
+    scale1 = vec_sel( tttt, vec_splat( scales, 2 ), selectMask );
+    return Vector4( vec_madd( unitVec0.get128(), scale0, _mm_mul_ps( unitVec1.get128(), scale1 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Vector4::get128( ) const
+{
+    return mVec128;
+}
+/*
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads )
+{
+    twoQuads[0] = _vmath2VfToHalfFloats(vec0.get128(), vec1.get128());
+    twoQuads[1] = _vmath2VfToHalfFloats(vec2.get128(), vec3.get128());
+}
+*/
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator =( const Vector4 &vec )
+{
+    mVec128 = vec.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setXYZ( const Vector3 &vec )
+{
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff};
+	mVec128 = vec_sel( vec.get128(), mVec128, sw );
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Vector4::getXYZ( ) const
+{
+    return Vector3( mVec128 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( float _w )
+{
+    _vmathVfSetElement(mVec128, _w, 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setW( const floatInVec &_w )
+{
+    mVec128 = _vmathVfInsert(mVec128, _w.get128(), 3);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getW( ) const
+{
+    return floatInVec( mVec128, 3 );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Vector4::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Vector4::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator +( const Vector4 &vec ) const
+{
+    return Vector4( _mm_add_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( const Vector4 &vec ) const
+{
+    return Vector4( _mm_sub_ps( mVec128, vec.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( float scalar ) const
+{
+    return *this * floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator *( const floatInVec &scalar ) const
+{
+    return Vector4( _mm_mul_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator +=( const Vector4 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator -=( const Vector4 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( float scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator *=( const floatInVec &scalar )
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( float scalar ) const
+{
+    return *this / floatInVec(scalar);
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator /( const floatInVec &scalar ) const
+{
+    return Vector4( _mm_div_ps( mVec128, scalar.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( float scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Vector4 & Vector4::operator /=( const floatInVec &scalar )
+{
+    *this = *this / scalar;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 Vector4::operator -( ) const
+{
+	return Vector4(_mm_sub_ps( _mm_setzero_ps(), mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec )
+{
+    return floatInVec(scalar) * vec;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec )
+{
+    return vec * scalar;
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_mul_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_div_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec )
+{
+    return Vector4( _mm_rcp_ps( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec )
+{
+    return Vector4( fabsf4( vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Vector4( _mm_or_ps(
+		_mm_and_ps   ( vmask, vec0.get128() ),			// Value
+		_mm_andnot_ps( vmask, vec1.get128() ) ) );		// Signs
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_max_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec )
+{
+    return floatInVec( _mm_max_ps(
+		_mm_max_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_max_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return Vector4( _mm_min_ps( vec0.get128(), vec1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec )
+{
+    return floatInVec( _mm_min_ps(
+		_mm_min_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_min_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec )
+{
+    return floatInVec( _mm_add_ps(
+		_mm_add_ps( vec_splat( vec.get128(), 0 ), vec_splat( vec.get128(), 1 ) ),
+		_mm_add_ps( vec_splat( vec.get128(), 2 ), vec_splat( vec.get128(), 3 ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 )
+{
+    return floatInVec( _vmathVfDot4( vec0.get128(), vec1.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec )
+{
+    return floatInVec(  _vmathVfDot4( vec.get128(), vec.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec )
+{
+    return floatInVec(  _mm_sqrt_ps(_vmathVfDot4( vec.get128(), vec.get128() )), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 normalizeApprox( const Vector4 &vec )
+{
+    return Vector4( _mm_mul_ps( vec.get128(), _mm_rsqrt_ps( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec )
+{
+    return Vector4( _mm_mul_ps( vec.get128(), newtonrapson_rsqrt4( _vmathVfDot4( vec.get128(), vec.get128() ) ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 )
+{
+    return select( vec0, vec1, boolInVec(select1) );
+}
+
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "( %f %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = vec.get128();
+    printf( "%s: ( %f %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2], tmp.s[3] );
+}
+
+#endif
+
+VECTORMATH_FORCE_INLINE Point3::Point3( float _x, float _y, float _z )
+{
+    mVec128 = _mm_setr_ps(_x, _y, _z, 0.0f);
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &_x, const floatInVec &_y, const floatInVec &_z )
+{
+	mVec128 = _mm_unpacklo_ps( _mm_unpacklo_ps( _x.get128(), _z.get128() ), _y.get128() );
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( const Vector3 &vec )
+{
+    mVec128 = vec.get128();
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( float scalar )
+{
+    mVec128 = floatInVec(scalar).get128();
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( const floatInVec &scalar )
+{
+    mVec128 = scalar.get128();
+}
+
+VECTORMATH_FORCE_INLINE Point3::Point3( __m128 vf4 )
+{
+    mVec128 = vf4;
+}
+
+VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return lerp( floatInVec(t), pnt0, pnt1 );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return ( pnt0 + ( ( pnt1 - pnt0 ) * t ) );
+}
+
+VECTORMATH_FORCE_INLINE __m128 Point3::get128( ) const
+{
+    return mVec128;
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad )
+{
+    __m128 dstVec = *quad;
+	VM_ATTRIBUTE_ALIGN16 unsigned int sw[4] = {0, 0, 0, 0xffffffff}; // TODO: Centralize
+    dstVec = vec_sel(pnt.get128(), dstVec, sw);
+    *quad = dstVec;
+}
+
+VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads )
+{
+	const float *quads = (float *)threeQuads;
+    pnt0 = Point3(  _mm_load_ps(quads) );
+    pnt1 = Point3( _mm_loadu_ps(quads + 3) );
+    pnt2 = Point3( _mm_loadu_ps(quads + 6) );
+    pnt3 = Point3( _mm_loadu_ps(quads + 9) );
+}
+
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads )
+{
+	__m128 xxxx = _mm_shuffle_ps( pnt1.get128(), pnt1.get128(), _MM_SHUFFLE(0, 0, 0, 0) );
+	__m128 zzzz = _mm_shuffle_ps( pnt2.get128(), pnt2.get128(), _MM_SHUFFLE(2, 2, 2, 2) );
+	VM_ATTRIBUTE_ALIGN16 unsigned int xsw[4] = {0, 0, 0, 0xffffffff};
+	VM_ATTRIBUTE_ALIGN16 unsigned int zsw[4] = {0xffffffff, 0, 0, 0};
+	threeQuads[0] = vec_sel( pnt0.get128(), xxxx, xsw );
+    threeQuads[1] = _mm_shuffle_ps( pnt1.get128(), pnt2.get128(), _MM_SHUFFLE(1, 0, 2, 1) );
+    threeQuads[2] = vec_sel( _mm_shuffle_ps( pnt3.get128(), pnt3.get128(), _MM_SHUFFLE(2, 1, 0, 3) ), zzzz, zsw );
+}
+/*
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads )
+{
+#if 0
+    __m128 xyz0[3];
+    __m128 xyz1[3];
+    storeXYZArray( pnt0, pnt1, pnt2, pnt3, xyz0 );
+    storeXYZArray( pnt4, pnt5, pnt6, pnt7, xyz1 );
+    threeQuads[0] = _vmath2VfToHalfFloats(xyz0[0], xyz0[1]);
+    threeQuads[1] = _vmath2VfToHalfFloats(xyz0[2], xyz1[0]);
+    threeQuads[2] = _vmath2VfToHalfFloats(xyz1[1], xyz1[2]);
+#else
+	assert(0);
+#endif
+}
+*/
+VECTORMATH_FORCE_INLINE Point3 & Point3::operator =( const Point3 &pnt )
+{
+    mVec128 = pnt.mVec128;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setX( float _x )
+{
+    _vmathVfSetElement(mVec128, _x, 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setX( const floatInVec &_x )
+{
+    mVec128 = _vmathVfInsert(mVec128, _x.get128(), 0);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getX( ) const
+{
+    return floatInVec( mVec128, 0 );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setY( float _y )
+{
+    _vmathVfSetElement(mVec128, _y, 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setY( const floatInVec &_y )
+{
+    mVec128 = _vmathVfInsert(mVec128, _y.get128(), 1);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getY( ) const
+{
+    return floatInVec( mVec128, 1 );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( float _z )
+{
+    _vmathVfSetElement(mVec128, _z, 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setZ( const floatInVec &_z )
+{
+    mVec128 = _vmathVfInsert(mVec128, _z.get128(), 2);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getZ( ) const
+{
+    return floatInVec( mVec128, 2 );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, float value )
+{
+    _vmathVfSetElement(mVec128, value, idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::setElem( int idx, const floatInVec &value )
+{
+    mVec128 = _vmathVfInsert(mVec128, value.get128(), idx);
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::getElem( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE VecIdx Point3::operator []( int idx )
+{
+    return VecIdx( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec Point3::operator []( int idx ) const
+{
+    return floatInVec( mVec128, idx );
+}
+
+VECTORMATH_FORCE_INLINE const Vector3 Point3::operator -( const Point3 &pnt ) const
+{
+    return Vector3( _mm_sub_ps( mVec128, pnt.mVec128 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Point3::operator +( const Vector3 &vec ) const
+{
+    return Point3( _mm_add_ps( mVec128, vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 Point3::operator -( const Vector3 &vec ) const
+{
+    return Point3( _mm_sub_ps( mVec128, vec.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::operator +=( const Vector3 &vec )
+{
+    *this = *this + vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE Point3 & Point3::operator -=( const Vector3 &vec )
+{
+    *this = *this - vec;
+    return *this;
+}
+
+VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_mul_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_div_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt )
+{
+    return Point3( _mm_rcp_ps( pnt.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt )
+{
+    return Point3( fabsf4( pnt.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+	__m128 vmask = toM128(0x7fffffff);
+	return Point3( _mm_or_ps(
+		_mm_and_ps   ( vmask, pnt0.get128() ),			// Value
+		_mm_andnot_ps( vmask, pnt1.get128() ) ) );		// Signs
+}
+
+VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_max_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt )
+{
+    return floatInVec( _mm_max_ps( _mm_max_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return Point3( _mm_min_ps( pnt0.get128(), pnt1.get128() ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt )
+{
+    return floatInVec( _mm_min_ps( _mm_min_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt )
+{
+    return floatInVec( _mm_add_ps( _mm_add_ps( vec_splat( pnt.get128(), 0 ), vec_splat( pnt.get128(), 1 ) ), vec_splat( pnt.get128(), 2 ) ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal )
+{
+    return scale( pnt, floatInVec( scaleVal ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal )
+{
+    return mulPerElem( pnt, Point3( scaleVal ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec )
+{
+    return mulPerElem( pnt, Point3( scaleVec ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec )
+{
+    return floatInVec( _vmathVfDot3( pnt.get128(), unitVec.get128() ), 0 );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt )
+{
+    return lengthSqr( Vector3( pnt ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt )
+{
+    return length( Vector3( pnt ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return lengthSqr( ( pnt1 - pnt0 ) );
+}
+
+VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 )
+{
+    return length( ( pnt1 - pnt0 ) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 )
+{
+    return select( pnt0, pnt1, boolInVec(select1) );
+}
+
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 )
+{
+    return Point3( vec_sel( pnt0.get128(), pnt1.get128(), select1.get128() ) );
+}
+
+
+
+#ifdef _VECTORMATH_DEBUG
+
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "( %f %f %f )\n", tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name )
+{
+    union { __m128 v; float s[4]; } tmp;
+    tmp.v = pnt.get128();
+    printf( "%s: ( %f %f %f )\n", name, tmp.s[0], tmp.s[1], tmp.s[2] );
+}
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Engine/lib/bullet/src/vectormath/sse/vecidx_aos.h b/Engine/lib/bullet/src/vectormath/sse/vecidx_aos.h
new file mode 100644
index 000000000..8ba4b1d75
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/sse/vecidx_aos.h
@@ -0,0 +1,80 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _VECTORMATH_VECIDX_AOS_H
+#define _VECTORMATH_VECIDX_AOS_H
+
+
+#include "floatInVec.h"
+
+namespace Vectormath {
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// VecIdx 
+// Used in setting elements of Vector3, Vector4, Point3, or Quat with the 
+// subscripting operator.
+//
+
+VM_ATTRIBUTE_ALIGNED_CLASS16 (class) VecIdx
+{
+private:
+   __m128 &ref;
+   int i;
+public:
+    inline VecIdx( __m128& vec, int idx ): ref(vec) { i = idx; }
+
+    // implicitly casts to float unless _VECTORMATH_NO_SCALAR_CAST defined
+    // in which case, implicitly casts to floatInVec, and one must call
+    // getAsFloat to convert to float.
+    //
+#ifdef _VECTORMATH_NO_SCALAR_CAST
+    inline operator floatInVec() const;
+    inline float getAsFloat() const;
+#else
+    inline operator float() const;
+#endif
+
+    inline float operator =( float scalar );
+    inline floatInVec operator =( const floatInVec &scalar );
+    inline floatInVec operator =( const VecIdx& scalar );
+    inline floatInVec operator *=( float scalar );
+    inline floatInVec operator *=( const floatInVec &scalar );
+    inline floatInVec operator /=( float scalar );
+    inline floatInVec operator /=( const floatInVec &scalar );
+    inline floatInVec operator +=( float scalar );
+    inline floatInVec operator +=( const floatInVec &scalar );
+    inline floatInVec operator -=( float scalar );
+    inline floatInVec operator -=( const floatInVec &scalar );
+};
+
+} // namespace Aos
+} // namespace Vectormath
+
+#endif
diff --git a/Engine/lib/bullet/src/vectormath/sse/vectormath_aos.h b/Engine/lib/bullet/src/vectormath/sse/vectormath_aos.h
new file mode 100644
index 000000000..be5ae8c6e
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/sse/vectormath_aos.h
@@ -0,0 +1,2547 @@
+/*
+   Copyright (C) 2006, 2007 Sony Computer Entertainment Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms,
+   with or without modification, are permitted provided that the
+   following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the Sony Computer Entertainment Inc nor the names
+      of its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#ifndef _VECTORMATH_AOS_CPP_SSE_H
+#define _VECTORMATH_AOS_CPP_SSE_H
+
+#include <math.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+#include <assert.h>
+
+#define Vector3Ref Vector3&
+#define QuatRef	Quat&
+#define Matrix3Ref Matrix3&
+
+#if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400)
+	#define USE_SSE3_LDDQU
+
+	#define VM_ATTRIBUTE_ALIGNED_CLASS16(a) __declspec(align(16)) a
+	#define VM_ATTRIBUTE_ALIGN16 __declspec(align(16))
+	#define VECTORMATH_FORCE_INLINE __forceinline 
+#else
+	#define VM_ATTRIBUTE_ALIGNED_CLASS16(a) a __attribute__ ((aligned (16)))	
+	#define VM_ATTRIBUTE_ALIGN16 __attribute__ ((aligned (16)))	
+	#define VECTORMATH_FORCE_INLINE inline __attribute__ ((always_inline))
+	#ifdef __SSE3__
+		#define USE_SSE3_LDDQU
+	#endif //__SSE3__
+#endif//_WIN32
+
+
+#ifdef USE_SSE3_LDDQU
+#include <pmmintrin.h>//_mm_lddqu_si128
+#endif //USE_SSE3_LDDQU
+
+
+// TODO: Tidy
+typedef __m128 vec_float4;
+typedef __m128 vec_uint4;
+typedef __m128 vec_int4;
+typedef __m128i vec_uchar16;
+typedef __m128i vec_ushort8;
+
+#define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
+
+#define _mm_ror_ps(vec,i)	\
+	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec))
+#define _mm_rol_ps(vec,i)	\
+	(((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec))
+
+#define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4))
+
+#define _mm_abs_ps(vec)		_mm_andnot_ps(_MASKSIGN_,vec)
+#define _mm_neg_ps(vec)		_mm_xor_ps(_MASKSIGN_,vec)
+
+#define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) )
+
+union SSEFloat
+{
+	__m128i vi;
+	__m128 m128;
+	__m128 vf;
+	unsigned int	ui[4];
+	unsigned short s[8];
+	float f[4];
+	SSEFloat(__m128 v) : m128(v) {}
+    SSEFloat(__m128i v) : vi(v) {}
+	SSEFloat() {}//uninitialized
+};
+
+static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, __m128 mask)
+{
+	return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a));
+}
+static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, const unsigned int *_mask)
+{
+	return vec_sel(a, b, _mm_load_ps((float *)_mask));
+}
+static VECTORMATH_FORCE_INLINE __m128 vec_sel(__m128 a, __m128 b, unsigned int _mask)
+{
+	return vec_sel(a, b, _mm_set1_ps(*(float *)&_mask));
+}
+
+static VECTORMATH_FORCE_INLINE __m128 toM128(unsigned int x)
+{
+    return _mm_set1_ps( *(float *)&x );
+}
+
+static VECTORMATH_FORCE_INLINE __m128 fabsf4(__m128 x)
+{
+    return _mm_and_ps( x, toM128( 0x7fffffff ) );
+}
+/*
+union SSE64
+{
+	__m128 m128;
+	struct
+	{
+		__m64 m01;
+		__m64 m23;
+	} m64;
+};
+
+static VECTORMATH_FORCE_INLINE __m128 vec_cts(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	SSE64 sse64;
+	sse64.m64.m01 = _mm_cvttps_pi32(x);
+	sse64.m64.m23 = _mm_cvttps_pi32(_mm_ror_ps(x,2));
+	_mm_empty();
+    return sse64.m128;
+}
+
+static VECTORMATH_FORCE_INLINE __m128 vec_ctf(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	SSE64 sse64;
+	sse64.m128 = x;
+	__m128 result =_mm_movelh_ps(
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m01),
+		_mm_cvt_pi2ps(_mm_setzero_ps(), sse64.m64.m23));
+	_mm_empty();
+	return result;
+}
+*/
+static VECTORMATH_FORCE_INLINE __m128 vec_cts(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	__m128i result = _mm_cvtps_epi32(x);
+    return (__m128 &)result;
+}
+
+static VECTORMATH_FORCE_INLINE __m128 vec_ctf(__m128 x, int a)
+{
+	assert(a == 0); // Only 2^0 supported
+	(void)a;
+	return _mm_cvtepi32_ps((__m128i &)x);
+}
+
+#define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) )
+#define vec_sub(a,b) _mm_sub_ps( a, b )
+#define vec_add(a,b) _mm_add_ps( a, b )
+#define vec_mul(a,b) _mm_mul_ps( a, b )
+#define vec_xor(a,b) _mm_xor_ps( a, b )
+#define vec_and(a,b) _mm_and_ps( a, b )
+#define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b )
+#define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b )
+
+#define vec_mergeh(a,b) _mm_unpacklo_ps( a, b )
+#define vec_mergel(a,b) _mm_unpackhi_ps( a, b )
+
+#define vec_andc(a,b) _mm_andnot_ps( b, a )
+
+#define sqrtf4(x) _mm_sqrt_ps( x )
+#define rsqrtf4(x) _mm_rsqrt_ps( x )
+#define recipf4(x) _mm_rcp_ps( x )
+#define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x )
+
+static VECTORMATH_FORCE_INLINE __m128 newtonrapson_rsqrt4( const __m128 v )
+{   
+#define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f) 
+#define _three _mm_setr_ps(3.f,3.f,3.f,3.f)
+const __m128 approx = _mm_rsqrt_ps( v );   
+const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx);   
+return _mm_mul_ps(_mm_mul_ps(_half4, approx), _mm_sub_ps(_three, muls) );
+}
+
+static VECTORMATH_FORCE_INLINE __m128 acosf4(__m128 x)
+{
+    __m128 xabs = fabsf4(x);
+	__m128 select = _mm_cmplt_ps( x, _mm_setzero_ps() );
+    __m128 t1 = sqrtf4(vec_sub(_mm_set1_ps(1.0f), xabs));
+    
+    /* Instruction counts can be reduced if the polynomial was
+     * computed entirely from nested (dependent) fma's. However, 
+     * to reduce the number of pipeline stalls, the polygon is evaluated 
+     * in two halves (hi amd lo). 
+     */
+    __m128 xabs2 = _mm_mul_ps(xabs,  xabs);
+    __m128 xabs4 = _mm_mul_ps(xabs2, xabs2);
+    __m128 hi = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0012624911f),
+		xabs, _mm_set1_ps(0.0066700901f)),
+			xabs, _mm_set1_ps(-0.0170881256f)),
+				xabs, _mm_set1_ps( 0.0308918810f));
+    __m128 lo = vec_madd(vec_madd(vec_madd(_mm_set1_ps(-0.0501743046f),
+		xabs, _mm_set1_ps(0.0889789874f)),
+			xabs, _mm_set1_ps(-0.2145988016f)),
+				xabs, _mm_set1_ps( 1.5707963050f));
+    
+    __m128 result = vec_madd(hi, xabs4, lo);
+    
+    // Adjust the result if x is negactive.
+    return vec_sel(
+		vec_mul(t1, result),									// Positive
+		vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)),	// Negative
+		select);
+}
+
+static VECTORMATH_FORCE_INLINE __m128 sinf4(vec_float4 x)
+{
+
+//
+// Common constants used to evaluate sinf4/cosf4/tanf4
+//
+#define _SINCOS_CC0  -0.0013602249f
+#define _SINCOS_CC1   0.0416566950f
+#define _SINCOS_CC2  -0.4999990225f
+#define _SINCOS_SC0  -0.0001950727f
+#define _SINCOS_SC1   0.0083320758f
+#define _SINCOS_SC2  -0.1666665247f
+
+#define _SINCOS_KC1  1.57079625129f
+#define _SINCOS_KC2  7.54978995489e-8f
+
+    vec_float4 xl,xl2,xl3,res;
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(xl))*sign(xl))
+    //
+    vec_int4 q = vec_cts(xl,0);
+
+    // Compute an offset based on the quadrant that the angle falls in
+    // 
+    vec_int4 offset = _mm_and_ps(q,toM128(0x3));
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = vec_ctf(q,0);
+    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
+    
+    // Compute x^2 and x^3
+    //
+    xl2 = vec_mul(xl,xl);
+    xl3 = vec_mul(xl2,xl);
+    
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
+    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
+    //
+    
+    vec_float4 cx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
+    vec_float4 sx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    res = vec_sel(cx,sx,vec_cmpeq(vec_and(offset,
+                                          toM128(0x1)),
+										  _mm_setzero_ps()));
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    return vec_sel(
+		vec_xor(toM128(0x80000000U), res),	// Negative
+		res,								// Positive
+		vec_cmpeq(vec_and(offset,toM128(0x2)),_mm_setzero_ps()));
+}
+
+static VECTORMATH_FORCE_INLINE void sincosf4(vec_float4 x, vec_float4* s, vec_float4* c)
+{
+    vec_float4 xl,xl2,xl3;
+    vec_int4   offsetSin, offsetCos;
+
+    // Range reduction using : xl = angle * TwoOverPi;
+    //  
+    xl = vec_mul(x, _mm_set1_ps(0.63661977236f));
+
+    // Find the quadrant the angle falls in
+    // using:  q = (int) (ceil(abs(xl))*sign(xl))
+    //
+    //vec_int4 q = vec_cts(vec_add(xl,vec_sel(_mm_set1_ps(0.5f),xl,(0x80000000))),0);
+    vec_int4 q = vec_cts(xl,0);
+     
+    // Compute the offset based on the quadrant that the angle falls in.
+    // Add 1 to the offset for the cosine. 
+    //
+    offsetSin = vec_and(q,toM128((int)0x3));
+	__m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin);
+	offsetCos = (__m128 &)temp;
+
+    // Remainder in range [-pi/4..pi/4]
+    //
+    vec_float4 qf = vec_ctf(q,0);
+    xl  = vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC2),vec_nmsub(qf,_mm_set1_ps(_SINCOS_KC1),x));
+    
+    // Compute x^2 and x^3
+    //
+    xl2 = vec_mul(xl,xl);
+    xl3 = vec_mul(xl2,xl);
+    
+    // Compute both the sin and cos of the angles
+    // using a polynomial expression:
+    //   cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2), and
+    //   sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2)
+    //
+    vec_float4 cx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_CC0),xl2,_mm_set1_ps(_SINCOS_CC1)),xl2,_mm_set1_ps(_SINCOS_CC2)),xl2,_mm_set1_ps(1.0f));
+    vec_float4 sx =
+		vec_madd(
+			vec_madd(
+				vec_madd(_mm_set1_ps(_SINCOS_SC0),xl2,_mm_set1_ps(_SINCOS_SC1)),xl2,_mm_set1_ps(_SINCOS_SC2)),xl3,xl);
+
+    // Use the cosine when the offset is odd and the sin
+    // when the offset is even
+    //
+    vec_uint4 sinMask = (vec_uint4)vec_cmpeq(vec_and(offsetSin,toM128(0x1)),_mm_setzero_ps());
+    vec_uint4 cosMask = (vec_uint4)vec_cmpeq(vec_and(offsetCos,toM128(0x1)),_mm_setzero_ps());    
+    *s = vec_sel(cx,sx,sinMask);
+    *c = vec_sel(cx,sx,cosMask);
+
+    // Flip the sign of the result when (offset mod 4) = 1 or 2
+    //
+    sinMask = vec_cmpeq(vec_and(offsetSin,toM128(0x2)),_mm_setzero_ps());
+    cosMask = vec_cmpeq(vec_and(offsetCos,toM128(0x2)),_mm_setzero_ps());
+    
+    *s = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*s),*s,sinMask);
+    *c = vec_sel((vec_float4)vec_xor(toM128(0x80000000),(vec_uint4)*c),*c,cosMask);    
+}
+
+#include "vecidx_aos.h"
+#include "floatInVec.h"
+#include "boolInVec.h"
+
+#ifdef _VECTORMATH_DEBUG
+#include <stdio.h>
+#endif
+namespace Vectormath {
+
+namespace Aos {
+
+//-----------------------------------------------------------------------------
+// Forward Declarations
+//
+
+class Vector3;
+class Vector4;
+class Point3;
+class Quat;
+class Matrix3;
+class Matrix4;
+class Transform3;
+
+// A 3-D vector in array-of-structures format
+//
+class Vector3
+{
+    __m128 mVec128;
+
+	VECTORMATH_FORCE_INLINE void set128(vec_float4 vec);
+	 
+	 VECTORMATH_FORCE_INLINE  vec_float4& get128Ref();
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Vector3( ) { };
+
+	// Default copy constructor
+    // 
+	VECTORMATH_FORCE_INLINE Vector3(const Vector3& vec);
+
+    // Construct a 3-D vector from x, y, and z elements
+    // 
+    VECTORMATH_FORCE_INLINE Vector3( float x, float y, float z );
+
+    // Construct a 3-D vector from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
+
+    // Copy elements from a 3-D point into a 3-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( const Point3 &pnt );
+
+    // Set all elements of a 3-D vector to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( float scalar );
+
+    // Set all elements of a 3-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( const floatInVec &scalar );
+
+    // Set vector float data in a 3-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector3( __m128 vf4 );
+
+    // Get vector float data from a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+    // Assign one 3-D vector to another
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator =( const Vector3 &vec );
+
+    // Set the x element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setX( float x );
+
+    // Set the y element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setY( float y );
+
+    // Set the z element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setZ( float z );
+
+    // Set the x element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setX( const floatInVec &x );
+
+    // Set the y element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setY( const floatInVec &y );
+
+    // Set the z element of a 3-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setZ( const floatInVec &z );
+
+    // Get the x element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D vector by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, or z element of a 3-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Add two 3-D vectors
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator +( const Vector3 &vec ) const;
+
+    // Subtract a 3-D vector from another 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator -( const Vector3 &vec ) const;
+
+    // Add a 3-D vector to a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator +( const Point3 &pnt ) const;
+
+    // Multiply a 3-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar ) const;
+
+    // Divide a 3-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator /( float scalar ) const;
+
+    // Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar ) const;
+
+    // Divide a 3-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator +=( const Vector3 &vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator -=( const Vector3 &vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector3 xAxis( );
+
+    // Construct y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector3 yAxis( );
+
+    // Construct z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector3 zAxis( );
+
+};
+
+// Multiply a 3-D vector by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Vector3 operator *( float scalar, const Vector3 &vec );
+
+// Multiply a 3-D vector by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Vector3 operator *( const floatInVec &scalar, const Vector3 &vec );
+
+// Multiply two 3-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 mulPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Divide two 3-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 divPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Compute the reciprocal of a 3-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 recipPerElem( const Vector3 &vec );
+
+// Compute the absolute value of a 3-D vector per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 absPerElem( const Vector3 &vec );
+
+// Copy sign from one 3-D vector to another, per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 copySignPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Maximum of two 3-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 maxPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Minimum of two 3-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector3 minPerElem( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Maximum element of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector3 &vec );
+
+// Minimum element of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector3 &vec );
+
+// Compute the sum of all elements of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector3 &vec );
+
+// Compute the dot product of two 3-D vectors
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Compute the square of the length of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector3 &vec );
+
+// Compute the length of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector3 &vec );
+
+// Normalize a 3-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 normalize( const Vector3 &vec );
+
+// Compute cross product of two 3-D vectors
+// 
+VECTORMATH_FORCE_INLINE const Vector3 cross( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Outer product of two 3-D vectors
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 outer( const Vector3 &vec0, const Vector3 &vec1 );
+
+// Pre-multiply a row vector by a 3x3 matrix
+// NOTE: 
+// Slower than column post-multiply.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 rowMul( const Vector3 &vec, const Matrix3 & mat );
+
+// Cross-product matrix of a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrix( const Vector3 &vec );
+
+// Create cross-product matrix and multiply
+// NOTE: 
+// Faster than separately creating a cross-product matrix and multiplying.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 crossMatrixMul( const Vector3 &vec, const Matrix3 & mat );
+
+// Linear interpolation between two 3-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 lerp( float t, const Vector3 &vec0, const Vector3 &vec1 );
+
+// Linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 lerp( const floatInVec &t, const Vector3 &vec0, const Vector3 &vec1 );
+
+// Spherical linear interpolation between two 3-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 slerp( float t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+// Spherical linear interpolation between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 slerp( const floatInVec &t, const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+// Conditionally select between two 3-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, bool select1 );
+
+// Conditionally select between two 3-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Vector3 select( const Vector3 &vec0, const Vector3 &vec1, const boolInVec &select1 );
+
+// Store x, y, and z elements of 3-D vector in first three words of a quadword, preserving fourth word
+// 
+VECTORMATH_FORCE_INLINE void storeXYZ( const Vector3 &vec, __m128 * quad );
+
+// Load four three-float 3-D vectors, stored in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void loadXYZArray( Vector3 & vec0, Vector3 & vec1, Vector3 & vec2, Vector3 & vec3, const __m128 * threeQuads );
+
+// Store four 3-D vectors in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, __m128 * threeQuads );
+
+// Store eight 3-D vectors as half-floats
+// 
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector3 &vec0, const Vector3 &vec1, const Vector3 &vec2, const Vector3 &vec3, const Vector3 &vec4, const Vector3 &vec5, const Vector3 &vec6, const Vector3 &vec7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec );
+
+// Print a 3-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector3 &vec, const char * name );
+
+#endif
+
+// A 4-D vector in array-of-structures format
+//
+class Vector4
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( ) { };
+
+    // Construct a 4-D vector from x, y, z, and w elements
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( float x, float y, float z, float w );
+
+    // Construct a 4-D vector from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( const Vector3 &xyz, float w );
+
+    // Construct a 4-D vector from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4( const Vector3 &xyz, const floatInVec &w );
+
+    // Copy x, y, and z from a 3-D vector into a 4-D vector, and set w to 0
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const Vector3 &vec );
+
+    // Copy x, y, and z from a 3-D point into a 4-D vector, and set w to 1
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const Point3 &pnt );
+
+    // Copy elements from a quaternion into a 4-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const Quat &quat );
+
+    // Set all elements of a 4-D vector to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( float scalar );
+
+    // Set all elements of a 4-D vector to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( const floatInVec &scalar );
+
+    // Set vector float data in a 4-D vector
+    // 
+    explicit VECTORMATH_FORCE_INLINE Vector4( __m128 vf4 );
+
+    // Get vector float data from a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+    // Assign one 4-D vector to another
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator =( const Vector4 &vec );
+
+    // Set the x, y, and z elements of a 4-D vector
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setXYZ( const Vector3 &vec );
+
+    // Get the x, y, and z elements of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getXYZ( ) const;
+
+    // Set the x element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setX( float x );
+
+    // Set the y element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setY( float y );
+
+    // Set the z element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setZ( float z );
+
+    // Set the w element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setW( float w );
+
+    // Set the x element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setX( const floatInVec &x );
+
+    // Set the y element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setY( const floatInVec &y );
+
+    // Set the z element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setZ( const floatInVec &z );
+
+    // Set the w element of a 4-D vector (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setW( const floatInVec &w );
+
+    // Get the x element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Get the w element of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a 4-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a 4-D vector by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, z, or w element of a 4-D vector by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Add two 4-D vectors
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator +( const Vector4 &vec ) const;
+
+    // Subtract a 4-D vector from another 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator -( const Vector4 &vec ) const;
+
+    // Multiply a 4-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar ) const;
+
+    // Divide a 4-D vector by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator /( float scalar ) const;
+
+    // Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar ) const;
+
+    // Divide a 4-D vector by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator +=( const Vector4 &vec );
+
+    // Perform compound assignment and subtraction by a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator -=( const Vector4 &vec );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator -( ) const;
+
+    // Construct x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 xAxis( );
+
+    // Construct y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 yAxis( );
+
+    // Construct z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 zAxis( );
+
+    // Construct w axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Vector4 wAxis( );
+
+};
+
+// Multiply a 4-D vector by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Vector4 operator *( float scalar, const Vector4 &vec );
+
+// Multiply a 4-D vector by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Vector4 operator *( const floatInVec &scalar, const Vector4 &vec );
+
+// Multiply two 4-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 mulPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Divide two 4-D vectors per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 divPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Compute the reciprocal of a 4-D vector per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 recipPerElem( const Vector4 &vec );
+
+// Compute the absolute value of a 4-D vector per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 absPerElem( const Vector4 &vec );
+
+// Copy sign from one 4-D vector to another, per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 copySignPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Maximum of two 4-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 maxPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Minimum of two 4-D vectors per element
+// 
+VECTORMATH_FORCE_INLINE const Vector4 minPerElem( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Maximum element of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Vector4 &vec );
+
+// Minimum element of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Vector4 &vec );
+
+// Compute the sum of all elements of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Vector4 &vec );
+
+// Compute the dot product of two 4-D vectors
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Compute the square of the length of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec lengthSqr( const Vector4 &vec );
+
+// Compute the length of a 4-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec length( const Vector4 &vec );
+
+// Normalize a 4-D vector
+// NOTE: 
+// The result is unpredictable when all elements of vec are at or near zero.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 normalize( const Vector4 &vec );
+
+// Outer product of two 4-D vectors
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 outer( const Vector4 &vec0, const Vector4 &vec1 );
+
+// Linear interpolation between two 4-D vectors
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 lerp( float t, const Vector4 &vec0, const Vector4 &vec1 );
+
+// Linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 lerp( const floatInVec &t, const Vector4 &vec0, const Vector4 &vec1 );
+
+// Spherical linear interpolation between two 4-D vectors
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 slerp( float t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
+
+// Spherical linear interpolation between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// The result is unpredictable if the vectors point in opposite directions.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 slerp( const floatInVec &t, const Vector4 &unitVec0, const Vector4 &unitVec1 );
+
+// Conditionally select between two 4-D vectors
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, bool select1 );
+
+// Conditionally select between two 4-D vectors (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Vector4 select( const Vector4 &vec0, const Vector4 &vec1, const boolInVec &select1 );
+
+// Store four 4-D vectors as half-floats
+// 
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Vector4 &vec0, const Vector4 &vec1, const Vector4 &vec2, const Vector4 &vec3, vec_ushort8 * twoQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4-D vector
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec );
+
+// Print a 4-D vector and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Vector4 &vec, const char * name );
+
+#endif
+
+// A 3-D point in array-of-structures format
+//
+class Point3
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Point3( ) { };
+
+    // Construct a 3-D point from x, y, and z elements
+    // 
+    VECTORMATH_FORCE_INLINE Point3( float x, float y, float z );
+
+    // Construct a 3-D point from x, y, and z elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3( const floatInVec &x, const floatInVec &y, const floatInVec &z );
+
+    // Copy elements from a 3-D vector into a 3-D point
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( const Vector3 &vec );
+
+    // Set all elements of a 3-D point to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( float scalar );
+
+    // Set all elements of a 3-D point to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( const floatInVec &scalar );
+
+    // Set vector float data in a 3-D point
+    // 
+    explicit VECTORMATH_FORCE_INLINE Point3( __m128 vf4 );
+
+    // Get vector float data from a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+    // Assign one 3-D point to another
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & operator =( const Point3 &pnt );
+
+    // Set the x element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setX( float x );
+
+    // Set the y element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setY( float y );
+
+    // Set the z element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setZ( float z );
+
+    // Set the x element of a 3-D point (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setX( const floatInVec &x );
+
+    // Set the y element of a 3-D point (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setY( const floatInVec &y );
+
+    // Set the z element of a 3-D point (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setZ( const floatInVec &z );
+
+    // Get the x element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Set an x, y, or z element of a 3-D point by index
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setElem( int idx, float value );
+
+    // Set an x, y, or z element of a 3-D point by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, or z element of a 3-D point by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Subtract a 3-D point from another 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator -( const Point3 &pnt ) const;
+
+    // Add a 3-D point to a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator +( const Vector3 &vec ) const;
+
+    // Subtract a 3-D vector from a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator -( const Vector3 &vec ) const;
+
+    // Perform compound assignment and addition with a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & operator +=( const Vector3 &vec );
+
+    // Perform compound assignment and subtraction by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Point3 & operator -=( const Vector3 &vec );
+
+};
+
+// Multiply two 3-D points per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 mulPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Divide two 3-D points per element
+// NOTE: 
+// Floating-point behavior matches standard library function divf4.
+// 
+VECTORMATH_FORCE_INLINE const Point3 divPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Compute the reciprocal of a 3-D point per element
+// NOTE: 
+// Floating-point behavior matches standard library function recipf4.
+// 
+VECTORMATH_FORCE_INLINE const Point3 recipPerElem( const Point3 &pnt );
+
+// Compute the absolute value of a 3-D point per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 absPerElem( const Point3 &pnt );
+
+// Copy sign from one 3-D point to another, per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 copySignPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Maximum of two 3-D points per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 maxPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Minimum of two 3-D points per element
+// 
+VECTORMATH_FORCE_INLINE const Point3 minPerElem( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Maximum element of a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const floatInVec maxElem( const Point3 &pnt );
+
+// Minimum element of a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const floatInVec minElem( const Point3 &pnt );
+
+// Compute the sum of all elements of a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const floatInVec sum( const Point3 &pnt );
+
+// Apply uniform scale to a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, float scaleVal );
+
+// Apply uniform scale to a 3-D point (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const floatInVec &scaleVal );
+
+// Apply non-uniform scale to a 3-D point
+// 
+VECTORMATH_FORCE_INLINE const Point3 scale( const Point3 &pnt, const Vector3 &scaleVec );
+
+// Scalar projection of a 3-D point on a unit-length 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const floatInVec projection( const Point3 &pnt, const Vector3 &unitVec );
+
+// Compute the square of the distance of a 3-D point from the coordinate-system origin
+// 
+VECTORMATH_FORCE_INLINE const floatInVec distSqrFromOrigin( const Point3 &pnt );
+
+// Compute the distance of a 3-D point from the coordinate-system origin
+// 
+VECTORMATH_FORCE_INLINE const floatInVec distFromOrigin( const Point3 &pnt );
+
+// Compute the square of the distance between two 3-D points
+// 
+VECTORMATH_FORCE_INLINE const floatInVec distSqr( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Compute the distance between two 3-D points
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dist( const Point3 &pnt0, const Point3 &pnt1 );
+
+// Linear interpolation between two 3-D points
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Point3 lerp( float t, const Point3 &pnt0, const Point3 &pnt1 );
+
+// Linear interpolation between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Point3 lerp( const floatInVec &t, const Point3 &pnt0, const Point3 &pnt1 );
+
+// Conditionally select between two 3-D points
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, bool select1 );
+
+// Conditionally select between two 3-D points (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Point3 select( const Point3 &pnt0, const Point3 &pnt1, const boolInVec &select1 );
+
+// Store x, y, and z elements of 3-D point in first three words of a quadword, preserving fourth word
+// 
+VECTORMATH_FORCE_INLINE void storeXYZ( const Point3 &pnt, __m128 * quad );
+
+// Load four three-float 3-D points, stored in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void loadXYZArray( Point3 & pnt0, Point3 & pnt1, Point3 & pnt2, Point3 & pnt3, const __m128 * threeQuads );
+
+// Store four 3-D points in three quadwords
+// 
+VECTORMATH_FORCE_INLINE void storeXYZArray( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, __m128 * threeQuads );
+
+// Store eight 3-D points as half-floats
+// 
+VECTORMATH_FORCE_INLINE void storeHalfFloats( const Point3 &pnt0, const Point3 &pnt1, const Point3 &pnt2, const Point3 &pnt3, const Point3 &pnt4, const Point3 &pnt5, const Point3 &pnt6, const Point3 &pnt7, vec_ushort8 * threeQuads );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3-D point
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt );
+
+// Print a 3-D point and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Point3 &pnt, const char * name );
+
+#endif
+
+// A quaternion in array-of-structures format
+//
+class Quat
+{
+    __m128 mVec128;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Quat( ) { };
+
+	VECTORMATH_FORCE_INLINE  Quat(const Quat& quat);
+
+    // Construct a quaternion from x, y, z, and w elements
+    // 
+    VECTORMATH_FORCE_INLINE Quat( float x, float y, float z, float w );
+
+    // Construct a quaternion from x, y, z, and w elements (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat( const floatInVec &x, const floatInVec &y, const floatInVec &z, const floatInVec &w );
+
+    // Construct a quaternion from a 3-D vector and a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Quat( const Vector3 &xyz, float w );
+
+    // Construct a quaternion from a 3-D vector and a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat( const Vector3 &xyz, const floatInVec &w );
+
+    // Copy elements from a 4-D vector into a quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( const Vector4 &vec );
+
+    // Convert a rotation matrix to a unit-length quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( const Matrix3 & rotMat );
+
+    // Set all elements of a quaternion to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( float scalar );
+
+    // Set all elements of a quaternion to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( const floatInVec &scalar );
+
+    // Set vector float data in a quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Quat( __m128 vf4 );
+
+    // Get vector float data from a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE __m128 get128( ) const;
+
+	// Set a quaterion from vector float data
+    //
+	VECTORMATH_FORCE_INLINE void set128(vec_float4 vec);
+
+    // Assign one quaternion to another
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator =( const Quat &quat );
+
+    // Set the x, y, and z elements of a quaternion
+    // NOTE: 
+    // This function does not change the w element.
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setXYZ( const Vector3 &vec );
+
+    // Get the x, y, and z elements of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getXYZ( ) const;
+
+    // Set the x element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setX( float x );
+
+    // Set the y element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setY( float y );
+
+    // Set the z element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setZ( float z );
+
+    // Set the w element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setW( float w );
+
+    // Set the x element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setX( const floatInVec &x );
+
+    // Set the y element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setY( const floatInVec &y );
+
+    // Set the z element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setZ( const floatInVec &z );
+
+    // Set the w element of a quaternion (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setW( const floatInVec &w );
+
+    // Get the x element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getX( ) const;
+
+    // Get the y element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getY( ) const;
+
+    // Get the z element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getZ( ) const;
+
+    // Get the w element of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getW( ) const;
+
+    // Set an x, y, z, or w element of a quaternion by index
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setElem( int idx, float value );
+
+    // Set an x, y, z, or w element of a quaternion by index (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & setElem( int idx, const floatInVec &value );
+
+    // Get an x, y, z, or w element of a quaternion by index
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int idx ) const;
+
+    // Subscripting operator to set or get an element
+    // 
+    VECTORMATH_FORCE_INLINE VecIdx operator []( int idx );
+
+    // Subscripting operator to get an element
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec operator []( int idx ) const;
+
+    // Add two quaternions
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator +( const Quat &quat ) const;
+
+    // Subtract a quaternion from another quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator -( const Quat &quat ) const;
+
+    // Multiply two quaternions
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator *( const Quat &quat ) const;
+
+    // Multiply a quaternion by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator *( float scalar ) const;
+
+    // Divide a quaternion by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator /( float scalar ) const;
+
+    // Multiply a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar ) const;
+
+    // Divide a quaternion by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator /( const floatInVec &scalar ) const;
+
+    // Perform compound assignment and addition with a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator +=( const Quat &quat );
+
+    // Perform compound assignment and subtraction by a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator -=( const Quat &quat );
+
+    // Perform compound assignment and multiplication by a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator *=( const Quat &quat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator *=( float scalar );
+
+    // Perform compound assignment and division by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator /=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and division by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Quat & operator /=( const floatInVec &scalar );
+
+    // Negate all elements of a quaternion
+    // 
+    VECTORMATH_FORCE_INLINE const Quat operator -( ) const;
+
+    // Construct an identity quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat identity( );
+
+    // Construct a quaternion to rotate between two unit-length 3-D vectors
+    // NOTE: 
+    // The result is unpredictable if unitVec0 and unitVec1 point in opposite directions.
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotation( const Vector3 &unitVec0, const Vector3 &unitVec1 );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a quaternion to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a quaternion to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationX( float radians );
+
+    // Construct a quaternion to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationY( float radians );
+
+    // Construct a quaternion to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationZ( float radians );
+
+    // Construct a quaternion to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationX( const floatInVec &radians );
+
+    // Construct a quaternion to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationY( const floatInVec &radians );
+
+    // Construct a quaternion to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Quat rotationZ( const floatInVec &radians );
+
+};
+
+// Multiply a quaternion by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Quat operator *( float scalar, const Quat &quat );
+
+// Multiply a quaternion by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Quat operator *( const floatInVec &scalar, const Quat &quat );
+
+// Compute the conjugate of a quaternion
+// 
+VECTORMATH_FORCE_INLINE const Quat conj( const Quat &quat );
+
+// Use a unit-length quaternion to rotate a 3-D vector
+// 
+VECTORMATH_FORCE_INLINE const Vector3 rotate( const Quat &unitQuat, const Vector3 &vec );
+
+// Compute the dot product of two quaternions
+// 
+VECTORMATH_FORCE_INLINE const floatInVec dot( const Quat &quat0, const Quat &quat1 );
+
+// Compute the norm of a quaternion
+// 
+VECTORMATH_FORCE_INLINE const floatInVec norm( const Quat &quat );
+
+// Compute the length of a quaternion
+// 
+VECTORMATH_FORCE_INLINE const floatInVec length( const Quat &quat );
+
+// Normalize a quaternion
+// NOTE: 
+// The result is unpredictable when all elements of quat are at or near zero.
+// 
+VECTORMATH_FORCE_INLINE const Quat normalize( const Quat &quat );
+
+// Linear interpolation between two quaternions
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat lerp( float t, const Quat &quat0, const Quat &quat1 );
+
+// Linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat lerp( const floatInVec &t, const Quat &quat0, const Quat &quat1 );
+
+// Spherical linear interpolation between two quaternions
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat slerp( float t, const Quat &unitQuat0, const Quat &unitQuat1 );
+
+// Spherical linear interpolation between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// Interpolates along the shortest path between orientations.
+// Does not clamp t between 0 and 1.
+// 
+VECTORMATH_FORCE_INLINE const Quat slerp( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1 );
+
+// Spherical quadrangle interpolation
+// 
+VECTORMATH_FORCE_INLINE const Quat squad( float t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
+
+// Spherical quadrangle interpolation (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Quat squad( const floatInVec &t, const Quat &unitQuat0, const Quat &unitQuat1, const Quat &unitQuat2, const Quat &unitQuat3 );
+
+// Conditionally select between two quaternions
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, bool select1 );
+
+// Conditionally select between two quaternions (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Quat select( const Quat &quat0, const Quat &quat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a quaternion
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Quat &quat );
+
+// Print a quaternion and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Quat &quat, const char * name );
+
+#endif
+
+// A 3x3 matrix in array-of-structures format
+//
+class Matrix3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3( ) { };
+
+    // Copy a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3( const Matrix3 & mat );
+
+    // Construct a 3x3 matrix containing the specified columns
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2 );
+
+    // Construct a 3x3 rotation matrix from a unit-length quaternion
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix3( const Quat &unitQuat );
+
+    // Set all elements of a 3x3 matrix to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix3( float scalar );
+
+    // Set all elements of a 3x3 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix3( const floatInVec &scalar );
+
+    // Assign one 3x3 matrix to another
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator =( const Matrix3 & mat );
+
+    // Set column 0 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol0( const Vector3 &col0 );
+
+    // Set column 1 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol1( const Vector3 &col1 );
+
+    // Set column 2 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol2( const Vector3 &col2 );
+
+    // Get column 0 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol2( ) const;
+
+    // Set the column of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setCol( int col, const Vector3 &vec );
+
+    // Set the row of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setRow( int row, const Vector3 &vec );
+
+    // Get the column of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x3 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x3 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x3 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 3x3 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const;
+
+    // Add two 3x3 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator +( const Matrix3 & mat ) const;
+
+    // Subtract a 3x3 matrix from another 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator -( const Matrix3 & mat ) const;
+
+    // Negate all elements of a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator -( ) const;
+
+    // Multiply a 3x3 matrix by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar ) const;
+
+    // Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar ) const;
+
+    // Multiply a 3x3 matrix by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( const Vector3 &vec ) const;
+
+    // Multiply two 3x3 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 operator *( const Matrix3 & mat ) const;
+
+    // Perform compound assignment and addition with a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator +=( const Matrix3 & mat );
+
+    // Perform compound assignment and subtraction by a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator -=( const Matrix3 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and multiplication by a 3x3 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix3 & operator *=( const Matrix3 & mat );
+
+    // Construct an identity 3x3 matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 identity( );
+
+    // Construct a 3x3 matrix to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationX( float radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationY( float radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationZ( float radians );
+
+    // Construct a 3x3 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationX( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationY( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationZ( const floatInVec &radians );
+
+    // Construct a 3x3 matrix to rotate around the x, y, and z axes
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 3x3 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 rotation( const Quat &unitQuat );
+
+    // Construct a 3x3 matrix to perform scaling
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix3 scale( const Vector3 &scaleVec );
+
+};
+// Multiply a 3x3 matrix by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( float scalar, const Matrix3 & mat );
+
+// Multiply a 3x3 matrix by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 operator *( const floatInVec &scalar, const Matrix3 & mat );
+
+// Append (post-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 appendScale( const Matrix3 & mat, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x3 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 prependScale( const Vector3 &scaleVec, const Matrix3 & mat );
+
+// Multiply two 3x3 matrices per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 mulPerElem( const Matrix3 & mat0, const Matrix3 & mat1 );
+
+// Compute the absolute value of a 3x3 matrix per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 absPerElem( const Matrix3 & mat );
+
+// Transpose of a 3x3 matrix
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 transpose( const Matrix3 & mat );
+
+// Compute the inverse of a 3x3 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 inverse( const Matrix3 & mat );
+
+// Determinant of a 3x3 matrix
+// 
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix3 & mat );
+
+// Conditionally select between two 3x3 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, bool select1 );
+
+// Conditionally select between two 3x3 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Matrix3 select( const Matrix3 & mat0, const Matrix3 & mat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x3 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat );
+
+// Print a 3x3 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix3 & mat, const char * name );
+
+#endif
+
+// A 4x4 matrix in array-of-structures format
+//
+class Matrix4
+{
+    Vector4 mCol0;
+    Vector4 mCol1;
+    Vector4 mCol2;
+    Vector4 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( ) { };
+
+    // Copy a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Matrix4 & mat );
+
+    // Construct a 4x4 matrix containing the specified columns
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3 );
+
+    // Construct a 4x4 matrix from a 3x4 transformation matrix
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix4( const Transform3 & mat );
+
+    // Construct a 4x4 matrix from a 3x3 matrix and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Matrix3 & mat, const Vector3 &translateVec );
+
+    // Construct a 4x4 matrix from a unit-length quaternion and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4( const Quat &unitQuat, const Vector3 &translateVec );
+
+    // Set all elements of a 4x4 matrix to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix4( float scalar );
+
+    // Set all elements of a 4x4 matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Matrix4( const floatInVec &scalar );
+
+    // Assign one 4x4 matrix to another
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator =( const Matrix4 & mat );
+
+    // Set the upper-left 3x3 submatrix
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // NOTE: 
+    // This function does not change the bottom row elements.
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setTranslation( const Vector3 &translateVec );
+
+    // Get the translation component of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol0( const Vector4 &col0 );
+
+    // Set column 1 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol1( const Vector4 &col1 );
+
+    // Set column 2 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol2( const Vector4 &col2 );
+
+    // Set column 3 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol3( const Vector4 &col3 );
+
+    // Get column 0 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol0( ) const;
+
+    // Get column 1 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol1( ) const;
+
+    // Get column 2 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol2( ) const;
+
+    // Get column 3 of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol3( ) const;
+
+    // Set the column of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setCol( int col, const Vector4 &vec );
+
+    // Set the row of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setRow( int row, const Vector4 &vec );
+
+    // Get the column of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getCol( int col ) const;
+
+    // Get the row of a 4x4 matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    VECTORMATH_FORCE_INLINE Vector4 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator []( int col ) const;
+
+    // Set the element of a 4x4 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setElem( int col, int row, float val );
+
+    // Set the element of a 4x4 matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 4x4 matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const;
+
+    // Add two 4x4 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator +( const Matrix4 & mat ) const;
+
+    // Subtract a 4x4 matrix from another 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator -( const Matrix4 & mat ) const;
+
+    // Negate all elements of a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator -( ) const;
+
+    // Multiply a 4x4 matrix by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar ) const;
+
+    // Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar ) const;
+
+    // Multiply a 4x4 matrix by a 4-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const Vector4 &vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const Vector3 &vec ) const;
+
+    // Multiply a 4x4 matrix by a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 operator *( const Point3 &pnt ) const;
+
+    // Multiply two 4x4 matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( const Matrix4 & mat ) const;
+
+    // Multiply a 4x4 matrix by a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix4 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and addition with a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator +=( const Matrix4 & mat );
+
+    // Perform compound assignment and subtraction by a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator -=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a scalar
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( float scalar );
+
+    // Perform compound assignment and multiplication by a scalar (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const floatInVec &scalar );
+
+    // Perform compound assignment and multiplication by a 4x4 matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const Matrix4 & mat );
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Matrix4 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 4x4 matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 identity( );
+
+    // Construct a 4x4 matrix to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationX( float radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationY( float radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationZ( float radians );
+
+    // Construct a 4x4 matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationX( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationY( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationZ( const floatInVec &radians );
+
+    // Construct a 4x4 matrix to rotate around the x, y, and z axes
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 4x4 matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 rotation( const Quat &unitQuat );
+
+    // Construct a 4x4 matrix to perform scaling
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 scale( const Vector3 &scaleVec );
+
+    // Construct a 4x4 matrix to perform translation
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 translation( const Vector3 &translateVec );
+
+    // Construct viewing matrix based on eye, position looked at, and up direction
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 lookAt( const Point3 &eyePos, const Point3 &lookAtPos, const Vector3 &upVec );
+
+    // Construct a perspective projection matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 perspective( float fovyRadians, float aspect, float zNear, float zFar );
+
+    // Construct a perspective projection matrix based on frustum
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 frustum( float left, float right, float bottom, float top, float zNear, float zFar );
+
+    // Construct an orthographic projection matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Matrix4 orthographic( float left, float right, float bottom, float top, float zNear, float zFar );
+
+};
+// Multiply a 4x4 matrix by a scalar
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( float scalar, const Matrix4 & mat );
+
+// Multiply a 4x4 matrix by a scalar (scalar data contained in vector data type)
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 operator *( const floatInVec &scalar, const Matrix4 & mat );
+
+// Append (post-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 appendScale( const Matrix4 & mat, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 4x4 matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 prependScale( const Vector3 &scaleVec, const Matrix4 & mat );
+
+// Multiply two 4x4 matrices per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 mulPerElem( const Matrix4 & mat0, const Matrix4 & mat1 );
+
+// Compute the absolute value of a 4x4 matrix per element
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 absPerElem( const Matrix4 & mat );
+
+// Transpose of a 4x4 matrix
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 transpose( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix
+// NOTE: 
+// Result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 inverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.  The result is unpredictable when the determinant of mat is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 affineInverse( const Matrix4 & mat );
+
+// Compute the inverse of a 4x4 matrix, which is expected to be an affine matrix with an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 4x4 matrix meets the given restrictions.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 orthoInverse( const Matrix4 & mat );
+
+// Determinant of a 4x4 matrix
+// 
+VECTORMATH_FORCE_INLINE const floatInVec determinant( const Matrix4 & mat );
+
+// Conditionally select between two 4x4 matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, bool select1 );
+
+// Conditionally select between two 4x4 matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Matrix4 select( const Matrix4 & mat0, const Matrix4 & mat1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 4x4 matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat );
+
+// Print a 4x4 matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Matrix4 & mat, const char * name );
+
+#endif
+
+// A 3x4 transformation matrix in array-of-structures format
+//
+class Transform3
+{
+    Vector3 mCol0;
+    Vector3 mCol1;
+    Vector3 mCol2;
+    Vector3 mCol3;
+
+public:
+    // Default constructor; does no initialization
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( ) { };
+
+    // Copy a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Transform3 & tfrm );
+
+    // Construct a 3x4 transformation matrix containing the specified columns
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Vector3 &col0, const Vector3 &col1, const Vector3 &col2, const Vector3 &col3 );
+
+    // Construct a 3x4 transformation matrix from a 3x3 matrix and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Matrix3 & tfrm, const Vector3 &translateVec );
+
+    // Construct a 3x4 transformation matrix from a unit-length quaternion and a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE Transform3( const Quat &unitQuat, const Vector3 &translateVec );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value
+    // 
+    explicit VECTORMATH_FORCE_INLINE Transform3( float scalar );
+
+    // Set all elements of a 3x4 transformation matrix to the same scalar value (scalar data contained in vector data type)
+    // 
+    explicit VECTORMATH_FORCE_INLINE Transform3( const floatInVec &scalar );
+
+    // Assign one 3x4 transformation matrix to another
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & operator =( const Transform3 & tfrm );
+
+    // Set the upper-left 3x3 submatrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setUpper3x3( const Matrix3 & mat3 );
+
+    // Get the upper-left 3x3 submatrix of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Matrix3 getUpper3x3( ) const;
+
+    // Set translation component
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setTranslation( const Vector3 &translateVec );
+
+    // Get the translation component of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getTranslation( ) const;
+
+    // Set column 0 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol0( const Vector3 &col0 );
+
+    // Set column 1 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol1( const Vector3 &col1 );
+
+    // Set column 2 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol2( const Vector3 &col2 );
+
+    // Set column 3 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol3( const Vector3 &col3 );
+
+    // Get column 0 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol0( ) const;
+
+    // Get column 1 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol1( ) const;
+
+    // Get column 2 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol2( ) const;
+
+    // Get column 3 of a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol3( ) const;
+
+    // Set the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setCol( int col, const Vector3 &vec );
+
+    // Set the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setRow( int row, const Vector4 &vec );
+
+    // Get the column of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 getCol( int col ) const;
+
+    // Get the row of a 3x4 transformation matrix referred to by the specified index
+    // 
+    VECTORMATH_FORCE_INLINE const Vector4 getRow( int row ) const;
+
+    // Subscripting operator to set or get a column
+    // 
+    VECTORMATH_FORCE_INLINE Vector3 & operator []( int col );
+
+    // Subscripting operator to get a column
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator []( int col ) const;
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setElem( int col, int row, float val );
+
+    // Set the element of a 3x4 transformation matrix referred to by column and row indices (scalar data contained in vector data type)
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & setElem( int col, int row, const floatInVec &val );
+
+    // Get the element of a 3x4 transformation matrix referred to by column and row indices
+    // 
+    VECTORMATH_FORCE_INLINE const floatInVec getElem( int col, int row ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D vector
+    // 
+    VECTORMATH_FORCE_INLINE const Vector3 operator *( const Vector3 &vec ) const;
+
+    // Multiply a 3x4 transformation matrix by a 3-D point
+    // 
+    VECTORMATH_FORCE_INLINE const Point3 operator *( const Point3 &pnt ) const;
+
+    // Multiply two 3x4 transformation matrices
+    // 
+    VECTORMATH_FORCE_INLINE const Transform3 operator *( const Transform3 & tfrm ) const;
+
+    // Perform compound assignment and multiplication by a 3x4 transformation matrix
+    // 
+    VECTORMATH_FORCE_INLINE Transform3 & operator *=( const Transform3 & tfrm );
+
+    // Construct an identity 3x4 transformation matrix
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 identity( );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationX( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationY( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationZ( float radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationX( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the y axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationY( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the z axis (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationZ( const floatInVec &radians );
+
+    // Construct a 3x4 transformation matrix to rotate around the x, y, and z axes
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotationZYX( const Vector3 &radiansXYZ );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotation( float radians, const Vector3 &unitVec );
+
+    // Construct a 3x4 transformation matrix to rotate around a unit-length 3-D vector (scalar data contained in vector data type)
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotation( const floatInVec &radians, const Vector3 &unitVec );
+
+    // Construct a rotation matrix from a unit-length quaternion
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 rotation( const Quat &unitQuat );
+
+    // Construct a 3x4 transformation matrix to perform scaling
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 scale( const Vector3 &scaleVec );
+
+    // Construct a 3x4 transformation matrix to perform translation
+    // 
+    static VECTORMATH_FORCE_INLINE const Transform3 translation( const Vector3 &translateVec );
+
+};
+// Append (post-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 appendScale( const Transform3 & tfrm, const Vector3 &scaleVec );
+
+// Prepend (pre-multiply) a scale transformation to a 3x4 transformation matrix
+// NOTE: 
+// Faster than creating and multiplying a scale transformation matrix.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 prependScale( const Vector3 &scaleVec, const Transform3 & tfrm );
+
+// Multiply two 3x4 transformation matrices per element
+// 
+VECTORMATH_FORCE_INLINE const Transform3 mulPerElem( const Transform3 & tfrm0, const Transform3 & tfrm1 );
+
+// Compute the absolute value of a 3x4 transformation matrix per element
+// 
+VECTORMATH_FORCE_INLINE const Transform3 absPerElem( const Transform3 & tfrm );
+
+// Inverse of a 3x4 transformation matrix
+// NOTE: 
+// Result is unpredictable when the determinant of the left 3x3 submatrix is equal to or near 0.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 inverse( const Transform3 & tfrm );
+
+// Compute the inverse of a 3x4 transformation matrix, expected to have an orthogonal upper-left 3x3 submatrix
+// NOTE: 
+// This can be used to achieve better performance than a general inverse when the specified 3x4 transformation matrix meets the given restrictions.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 orthoInverse( const Transform3 & tfrm );
+
+// Conditionally select between two 3x4 transformation matrices
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// However, the transfer of select1 to a VMX register may use more processing time than a branch.
+// Use the boolInVec version for better performance.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, bool select1 );
+
+// Conditionally select between two 3x4 transformation matrices (scalar data contained in vector data type)
+// NOTE: 
+// This function uses a conditional select instruction to avoid a branch.
+// 
+VECTORMATH_FORCE_INLINE const Transform3 select( const Transform3 & tfrm0, const Transform3 & tfrm1, const boolInVec &select1 );
+
+#ifdef _VECTORMATH_DEBUG
+
+// Print a 3x4 transformation matrix
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm );
+
+// Print a 3x4 transformation matrix and an associated string identifier
+// NOTE: 
+// Function is only defined when _VECTORMATH_DEBUG is defined.
+// 
+VECTORMATH_FORCE_INLINE void print( const Transform3 & tfrm, const char * name );
+
+#endif
+
+} // namespace Aos
+} // namespace Vectormath
+
+#include "vec_aos.h"
+#include "quat_aos.h"
+#include "mat_aos.h"
+
+#endif
diff --git a/Engine/lib/bullet/src/vectormath/vmInclude.h b/Engine/lib/bullet/src/vectormath/vmInclude.h
new file mode 100644
index 000000000..656514e42
--- /dev/null
+++ b/Engine/lib/bullet/src/vectormath/vmInclude.h
@@ -0,0 +1,31 @@
+
+#ifndef __VM_INCLUDE_H
+#define __VM_INCLUDE_H
+
+#include "LinearMath/btScalar.h"
+
+#if defined (USE_SYSTEM_VECTORMATH) || defined (__CELLOS_LV2__)
+	#include <vectormath_aos.h>
+#else //(USE_SYSTEM_VECTORMATH)
+	#if defined (BT_USE_SSE) 
+		#include "sse/vectormath_aos.h"
+	#else //all other platforms
+        #if defined (BT_USE_NEON)
+            #include "neon/vectormath_aos.h"
+        #else
+            #include "scalar/vectormath_aos.h"
+        #endif
+	#endif //(BT_USE_SSE) && defined (_WIN32)
+#endif //(USE_SYSTEM_VECTORMATH)
+
+
+
+typedef Vectormath::Aos::Vector3    vmVector3;
+typedef Vectormath::Aos::Quat       vmQuat;
+typedef Vectormath::Aos::Matrix3    vmMatrix3;
+typedef Vectormath::Aos::Transform3 vmTransform3;
+typedef Vectormath::Aos::Point3     vmPoint3;
+
+#endif //__VM_INCLUDE_H
+
+
diff --git a/Engine/lib/recast/CMakeLists.txt b/Engine/lib/recast/CMakeLists.txt
new file mode 100644
index 000000000..8dd69f639
--- /dev/null
+++ b/Engine/lib/recast/CMakeLists.txt
@@ -0,0 +1,27 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+PROJECT(RecastNavigation)
+#SET(RECAST_VERSION r129)
+
+IF(NOT CMAKE_BUILD_TYPE)
+#	SET(CMAKE_BUILD_TYPE "Debug")
+	SET(CMAKE_BUILD_TYPE "Release")
+ENDIF(NOT CMAKE_BUILD_TYPE)
+
+IF(MSVC)
+	OPTION(USE_MSVC_FAST_FLOATINGPOINT "Use MSVC /fp:fast option" ON)
+	IF(USE_MSVC_FAST_FLOATINGPOINT)
+		ADD_DEFINITIONS(/fp:fast)
+	ENDIF(USE_MSVC_FAST_FLOATINGPOINT)
+ENDIF(MSVC)
+
+IF(WIN32)
+	ADD_DEFINITIONS(/D _CRT_SECURE_NO_WARNINGS)
+ENDIF(WIN32)
+
+ADD_SUBDIRECTORY(DebugUtils)
+ADD_SUBDIRECTORY(Detour)
+ADD_SUBDIRECTORY(DetourCrowd)
+ADD_SUBDIRECTORY(DetourTileCache)
+ADD_SUBDIRECTORY(Recast)
+ADD_SUBDIRECTORY(RecastDemo)
diff --git a/Engine/lib/recast/DebugUtils/CMakeLists.txt b/Engine/lib/recast/DebugUtils/CMakeLists.txt
new file mode 100644
index 000000000..e79364ab2
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/CMakeLists.txt
@@ -0,0 +1,23 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+SET(debugutils_SRCS 
+	Source/DebugDraw.cpp
+	Source/DetourDebugDraw.cpp
+	Source/RecastDebugDraw.cpp
+	Source/RecastDump.cpp
+)
+
+SET(debugutils_HDRS
+	Include/DebugDraw.h
+	Include/DetourDebugDraw.h
+	Include/RecastDebugDraw.h
+	Include/RecastDump.h
+)
+
+INCLUDE_DIRECTORIES(Include 
+	../Detour/Include
+	../DetourTileCache/Include
+	../Recast/Include
+)
+
+ADD_LIBRARY(DebugUtils ${debugutils_SRCS} ${debugutils_HDRS})
diff --git a/Engine/lib/recast/DebugUtils/Include/DebugDraw.h b/Engine/lib/recast/DebugUtils/Include/DebugDraw.h
new file mode 100644
index 000000000..b24094fb2
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Include/DebugDraw.h
@@ -0,0 +1,216 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DEBUGDRAW_H
+#define DEBUGDRAW_H
+
+// Some math headers don't have PI defined.
+static const float DU_PI = 3.14159265f;
+
+enum duDebugDrawPrimitives
+{
+	DU_DRAW_POINTS,
+	DU_DRAW_LINES,
+	DU_DRAW_TRIS,
+	DU_DRAW_QUADS,	
+};
+
+/// Abstract debug draw interface.
+struct duDebugDraw
+{
+	virtual ~duDebugDraw() = 0;
+	
+	virtual void depthMask(bool state) = 0;
+
+	virtual void texture(bool state) = 0;
+
+	/// Begin drawing primitives.
+	///  @param prim [in] primitive type to draw, one of rcDebugDrawPrimitives.
+	///  @param size [in] size of a primitive, applies to point size and line width only.
+	virtual void begin(duDebugDrawPrimitives prim, float size = 1.0f) = 0;
+
+	/// Submit a vertex
+	///  @param pos [in] position of the verts.
+	///  @param color [in] color of the verts.
+	virtual void vertex(const float* pos, unsigned int color) = 0;
+
+	/// Submit a vertex
+	///  @param x,y,z [in] position of the verts.
+	///  @param color [in] color of the verts.
+	virtual void vertex(const float x, const float y, const float z, unsigned int color) = 0;
+
+	/// Submit a vertex
+	///  @param pos [in] position of the verts.
+	///  @param color [in] color of the verts.
+	virtual void vertex(const float* pos, unsigned int color, const float* uv) = 0;
+	
+	/// Submit a vertex
+	///  @param x,y,z [in] position of the verts.
+	///  @param color [in] color of the verts.
+	virtual void vertex(const float x, const float y, const float z, unsigned int color, const float u, const float v) = 0;
+	
+	/// End drawing primitives.
+	virtual void end() = 0;
+};
+
+inline unsigned int duRGBA(int r, int g, int b, int a)
+{
+	return ((unsigned int)r) | ((unsigned int)g << 8) | ((unsigned int)b << 16) | ((unsigned int)a << 24);
+}
+
+inline unsigned int duRGBAf(float fr, float fg, float fb, float fa)
+{
+	unsigned char r = (unsigned char)(fr*255.0f);
+	unsigned char g = (unsigned char)(fg*255.0f);
+	unsigned char b = (unsigned char)(fb*255.0f);
+	unsigned char a = (unsigned char)(fa*255.0f);
+	return duRGBA(r,g,b,a);
+}
+
+unsigned int duIntToCol(int i, int a);
+void duIntToCol(int i, float* col);
+
+inline unsigned int duMultCol(const unsigned int col, const unsigned int d)
+{
+	const unsigned int r = col & 0xff;
+	const unsigned int g = (col >> 8) & 0xff;
+	const unsigned int b = (col >> 16) & 0xff;
+	const unsigned int a = (col >> 24) & 0xff;
+	return duRGBA((r*d) >> 8, (g*d) >> 8, (b*d) >> 8, a);
+}
+
+inline unsigned int duDarkenCol(unsigned int col)
+{
+	return ((col >> 1) & 0x007f7f7f) | (col & 0xff000000);
+}
+
+inline unsigned int duLerpCol(unsigned int ca, unsigned int cb, unsigned int u)
+{
+	const unsigned int ra = ca & 0xff;
+	const unsigned int ga = (ca >> 8) & 0xff;
+	const unsigned int ba = (ca >> 16) & 0xff;
+	const unsigned int aa = (ca >> 24) & 0xff;
+	const unsigned int rb = cb & 0xff;
+	const unsigned int gb = (cb >> 8) & 0xff;
+	const unsigned int bb = (cb >> 16) & 0xff;
+	const unsigned int ab = (cb >> 24) & 0xff;
+	
+	unsigned int r = (ra*(255-u) + rb*u)/255;
+	unsigned int g = (ga*(255-u) + gb*u)/255;
+	unsigned int b = (ba*(255-u) + bb*u)/255;
+	unsigned int a = (aa*(255-u) + ab*u)/255;
+	return duRGBA(r,g,b,a);
+}
+
+inline unsigned int duTransCol(unsigned int c, unsigned int a)
+{
+	return (a<<24) | (c & 0x00ffffff);
+}
+
+
+void duCalcBoxColors(unsigned int* colors, unsigned int colTop, unsigned int colSide);
+
+void duDebugDrawCylinderWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+							 float maxx, float maxy, float maxz, unsigned int col, const float lineWidth);
+
+void duDebugDrawBoxWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+						float maxx, float maxy, float maxz, unsigned int col, const float lineWidth);
+
+void duDebugDrawArc(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+					const float x1, const float y1, const float z1, const float h,
+					const float as0, const float as1, unsigned int col, const float lineWidth);
+
+void duDebugDrawArrow(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+					  const float x1, const float y1, const float z1,
+					  const float as0, const float as1, unsigned int col, const float lineWidth);
+
+void duDebugDrawCircle(struct duDebugDraw* dd, const float x, const float y, const float z,
+					   const float r, unsigned int col, const float lineWidth);
+
+void duDebugDrawCross(struct duDebugDraw* dd, const float x, const float y, const float z,
+					  const float size, unsigned int col, const float lineWidth);
+
+void duDebugDrawBox(struct duDebugDraw* dd, float minx, float miny, float minz,
+					float maxx, float maxy, float maxz, const unsigned int* fcol);
+
+void duDebugDrawCylinder(struct duDebugDraw* dd, float minx, float miny, float minz,
+						 float maxx, float maxy, float maxz, unsigned int col);
+
+void duDebugDrawGridXZ(struct duDebugDraw* dd, const float ox, const float oy, const float oz,
+					   const int w, const int h, const float size,
+					   const unsigned int col, const float lineWidth);
+
+
+// Versions without begin/end, can be used to draw multiple primitives.
+void duAppendCylinderWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+						  float maxx, float maxy, float maxz, unsigned int col);
+
+void duAppendBoxWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+					 float maxx, float maxy, float maxz, unsigned int col);
+
+void duAppendBoxPoints(struct duDebugDraw* dd, float minx, float miny, float minz,
+					   float maxx, float maxy, float maxz, unsigned int col);
+
+void duAppendArc(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+				 const float x1, const float y1, const float z1, const float h,
+				 const float as0, const float as1, unsigned int col);
+
+void duAppendArrow(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+				   const float x1, const float y1, const float z1,
+				   const float as0, const float as1, unsigned int col);
+
+void duAppendCircle(struct duDebugDraw* dd, const float x, const float y, const float z,
+					const float r, unsigned int col);
+
+void duAppendCross(struct duDebugDraw* dd, const float x, const float y, const float z,
+				   const float size, unsigned int col);
+
+void duAppendBox(struct duDebugDraw* dd, float minx, float miny, float minz,
+				 float maxx, float maxy, float maxz, const unsigned int* fcol);
+
+void duAppendCylinder(struct duDebugDraw* dd, float minx, float miny, float minz,
+					  float maxx, float maxy, float maxz, unsigned int col);
+
+
+class duDisplayList : public duDebugDraw
+{
+	float* m_pos;
+	unsigned int* m_color;
+	int m_size;
+	int m_cap;
+
+	bool m_depthMask;
+	duDebugDrawPrimitives m_prim;
+	float m_primSize;
+	
+	void resize(int cap);
+	
+public:
+	duDisplayList(int cap = 512);
+	~duDisplayList();
+	virtual void depthMask(bool state);
+	virtual void begin(duDebugDrawPrimitives prim, float size = 1.0f);
+	virtual void vertex(const float x, const float y, const float z, unsigned int color);
+	virtual void vertex(const float* pos, unsigned int color);
+	virtual void end();
+	void clear();
+	void draw(struct duDebugDraw* dd);
+};
+
+
+#endif // DEBUGDRAW_H
diff --git a/Engine/lib/recast/DebugUtils/Include/DetourDebugDraw.h b/Engine/lib/recast/DebugUtils/Include/DetourDebugDraw.h
new file mode 100644
index 000000000..34d93e1e5
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Include/DetourDebugDraw.h
@@ -0,0 +1,48 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURDEBUGDRAW_H
+#define DETOURDEBUGDRAW_H
+
+#include "DetourNavMesh.h"
+#include "DetourNavMeshQuery.h"
+#include "DetourTileCacheBuilder.h"
+
+enum DrawNavMeshFlags
+{
+	DU_DRAWNAVMESH_OFFMESHCONS = 0x01,
+	DU_DRAWNAVMESH_CLOSEDLIST = 0x02,
+	DU_DRAWNAVMESH_COLOR_TILES = 0x04,
+};
+
+void duDebugDrawNavMesh(struct duDebugDraw* dd, const dtNavMesh& mesh, unsigned char flags);
+void duDebugDrawNavMeshWithClosedList(struct duDebugDraw* dd, const dtNavMesh& mesh, const dtNavMeshQuery& query, unsigned char flags);
+void duDebugDrawNavMeshNodes(struct duDebugDraw* dd, const dtNavMeshQuery& query);
+void duDebugDrawNavMeshBVTree(struct duDebugDraw* dd, const dtNavMesh& mesh);
+void duDebugDrawNavMeshPortals(struct duDebugDraw* dd, const dtNavMesh& mesh);
+void duDebugDrawNavMeshPolysWithFlags(struct duDebugDraw* dd, const dtNavMesh& mesh, const unsigned short polyFlags, const unsigned int col);
+void duDebugDrawNavMeshPoly(struct duDebugDraw* dd, const dtNavMesh& mesh, dtPolyRef ref, const unsigned int col);
+
+void duDebugDrawTileCacheLayerAreas(struct duDebugDraw* dd, const dtTileCacheLayer& layer, const float cs, const float ch);
+void duDebugDrawTileCacheLayerRegions(struct duDebugDraw* dd, const dtTileCacheLayer& layer, const float cs, const float ch);
+void duDebugDrawTileCacheContours(duDebugDraw* dd, const struct dtTileCacheContourSet& lcset,
+								  const float* orig, const float cs, const float ch);
+void duDebugDrawTileCachePolyMesh(duDebugDraw* dd, const struct dtTileCachePolyMesh& lmesh,
+								  const float* orig, const float cs, const float ch);
+
+#endif // DETOURDEBUGDRAW_H
\ No newline at end of file
diff --git a/Engine/lib/recast/DebugUtils/Include/RecastDebugDraw.h b/Engine/lib/recast/DebugUtils/Include/RecastDebugDraw.h
new file mode 100644
index 000000000..f75802d05
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Include/RecastDebugDraw.h
@@ -0,0 +1,46 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef RECAST_DEBUGDRAW_H
+#define RECAST_DEBUGDRAW_H
+
+void duDebugDrawTriMesh(struct duDebugDraw* dd, const float* verts, int nverts, const int* tris, const float* normals, int ntris, const unsigned char* flags, const float texScale);
+void duDebugDrawTriMeshSlope(struct duDebugDraw* dd, const float* verts, int nverts, const int* tris, const float* normals, int ntris, const float walkableSlopeAngle, const float texScale);
+
+void duDebugDrawHeightfieldSolid(struct duDebugDraw* dd, const struct rcHeightfield& hf);
+void duDebugDrawHeightfieldWalkable(struct duDebugDraw* dd, const struct rcHeightfield& hf);
+
+void duDebugDrawCompactHeightfieldSolid(struct duDebugDraw* dd, const struct rcCompactHeightfield& chf);
+void duDebugDrawCompactHeightfieldRegions(struct duDebugDraw* dd, const struct rcCompactHeightfield& chf);
+void duDebugDrawCompactHeightfieldDistance(struct duDebugDraw* dd, const struct rcCompactHeightfield& chf);
+
+void duDebugDrawHeightfieldLayer(duDebugDraw* dd, const struct rcHeightfieldLayer& layer, const int idx);
+void duDebugDrawHeightfieldLayers(duDebugDraw* dd, const struct rcHeightfieldLayerSet& lset);
+void duDebugDrawHeightfieldLayersRegions(duDebugDraw* dd, const struct rcHeightfieldLayerSet& lset);
+
+void duDebugDrawLayerContours(duDebugDraw* dd, const struct rcLayerContourSet& lcset);
+void duDebugDrawLayerPolyMesh(duDebugDraw* dd, const struct rcLayerPolyMesh& lmesh);
+
+
+void duDebugDrawRegionConnections(struct duDebugDraw* dd, const struct rcContourSet& cset, const float alpha = 1.0f);
+void duDebugDrawRawContours(struct duDebugDraw* dd, const struct rcContourSet& cset, const float alpha = 1.0f);
+void duDebugDrawContours(struct duDebugDraw* dd, const struct rcContourSet& cset, const float alpha = 1.0f);
+void duDebugDrawPolyMesh(struct duDebugDraw* dd, const struct rcPolyMesh& mesh);
+void duDebugDrawPolyMeshDetail(struct duDebugDraw* dd, const struct rcPolyMeshDetail& dmesh);
+
+#endif // RECAST_DEBUGDRAW_H
diff --git a/Engine/lib/recast/DebugUtils/Include/RecastDump.h b/Engine/lib/recast/DebugUtils/Include/RecastDump.h
new file mode 100644
index 000000000..6a722fdae
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Include/RecastDump.h
@@ -0,0 +1,43 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef RECAST_DUMP_H
+#define RECAST_DUMP_H
+
+struct duFileIO
+{
+	virtual ~duFileIO() = 0;
+	virtual bool isWriting() const = 0;
+	virtual bool isReading() const = 0;
+	virtual bool write(const void* ptr, const size_t size) = 0;
+	virtual bool read(void* ptr, const size_t size) = 0;
+};
+
+bool duDumpPolyMeshToObj(struct rcPolyMesh& pmesh, duFileIO* io);
+bool duDumpPolyMeshDetailToObj(struct rcPolyMeshDetail& dmesh, duFileIO* io);
+
+bool duDumpContourSet(struct rcContourSet& cset, duFileIO* io);
+bool duReadContourSet(struct rcContourSet& cset, duFileIO* io);
+
+bool duDumpCompactHeightfield(struct rcCompactHeightfield& chf, duFileIO* io);
+bool duReadCompactHeightfield(struct rcCompactHeightfield& chf, duFileIO* io);
+
+void duLogBuildTimes(rcContext& ctx, const int totalTileUsec);
+
+
+#endif // RECAST_DUMP_H
diff --git a/Engine/lib/recast/DebugUtils/Source/DebugDraw.cpp b/Engine/lib/recast/DebugUtils/Source/DebugDraw.cpp
new file mode 100644
index 000000000..982bdba32
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Source/DebugDraw.cpp
@@ -0,0 +1,599 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include "DebugDraw.h"
+
+
+duDebugDraw::~duDebugDraw()
+{
+	// Empty
+}
+	
+
+inline int bit(int a, int b)
+{
+	return (a & (1 << b)) >> b;
+}
+
+unsigned int duIntToCol(int i, int a)
+{
+	int	r = bit(i, 1) + bit(i, 3) * 2 + 1;
+	int	g = bit(i, 2) + bit(i, 4) * 2 + 1;
+	int	b = bit(i, 0) + bit(i, 5) * 2 + 1;
+	return duRGBA(r*63,g*63,b*63,a);
+}
+
+void duIntToCol(int i, float* col)
+{
+	int	r = bit(i, 0) + bit(i, 3) * 2 + 1;
+	int	g = bit(i, 1) + bit(i, 4) * 2 + 1;
+	int	b = bit(i, 2) + bit(i, 5) * 2 + 1;
+	col[0] = 1 - r*63.0f/255.0f;
+	col[1] = 1 - g*63.0f/255.0f;
+	col[2] = 1 - b*63.0f/255.0f;
+}
+
+void duCalcBoxColors(unsigned int* colors, unsigned int colTop, unsigned int colSide)
+{
+	if (!colors) return;
+	
+	colors[0] = duMultCol(colTop, 250);
+	colors[1] = duMultCol(colSide, 140);
+	colors[2] = duMultCol(colSide, 165);
+	colors[3] = duMultCol(colSide, 217);
+	colors[4] = duMultCol(colSide, 165);
+	colors[5] = duMultCol(colSide, 217);
+}
+
+void duDebugDrawCylinderWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+							 float maxx, float maxy, float maxz, unsigned int col, const float lineWidth)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_LINES, lineWidth);
+	duAppendCylinderWire(dd, minx,miny,minz, maxx,maxy,maxz, col);
+	dd->end();
+}
+
+void duDebugDrawBoxWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+						float maxx, float maxy, float maxz, unsigned int col, const float lineWidth)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_LINES, lineWidth);
+	duAppendBoxWire(dd, minx,miny,minz, maxx,maxy,maxz, col);
+	dd->end();
+}
+
+void duDebugDrawArc(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+					const float x1, const float y1, const float z1, const float h,
+					const float as0, const float as1, unsigned int col, const float lineWidth)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_LINES, lineWidth);
+	duAppendArc(dd, x0,y0,z0, x1,y1,z1, h, as0, as1, col);
+	dd->end();
+}
+
+void duDebugDrawArrow(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+					  const float x1, const float y1, const float z1,
+					  const float as0, const float as1, unsigned int col, const float lineWidth)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_LINES, lineWidth);
+	duAppendArrow(dd, x0,y0,z0, x1,y1,z1, as0, as1, col);
+	dd->end();
+}
+
+void duDebugDrawCircle(struct duDebugDraw* dd, const float x, const float y, const float z,
+					   const float r, unsigned int col, const float lineWidth)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_LINES, lineWidth);
+	duAppendCircle(dd, x,y,z, r, col);
+	dd->end();
+}
+
+void duDebugDrawCross(struct duDebugDraw* dd, const float x, const float y, const float z,
+					  const float size, unsigned int col, const float lineWidth)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_LINES, lineWidth);
+	duAppendCross(dd, x,y,z, size, col);
+	dd->end();
+}
+
+void duDebugDrawBox(struct duDebugDraw* dd, float minx, float miny, float minz,
+					float maxx, float maxy, float maxz, const unsigned int* fcol)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_QUADS);
+	duAppendBox(dd, minx,miny,minz, maxx,maxy,maxz, fcol);
+	dd->end();
+}
+
+void duDebugDrawCylinder(struct duDebugDraw* dd, float minx, float miny, float minz,
+						 float maxx, float maxy, float maxz, unsigned int col)
+{
+	if (!dd) return;
+	
+	dd->begin(DU_DRAW_TRIS);
+	duAppendCylinder(dd, minx,miny,minz, maxx,maxy,maxz, col);
+	dd->end();
+}
+
+void duDebugDrawGridXZ(struct duDebugDraw* dd, const float ox, const float oy, const float oz,
+					   const int w, const int h, const float size,
+					   const unsigned int col, const float lineWidth)
+{
+	if (!dd) return;
+
+	dd->begin(DU_DRAW_LINES, lineWidth);
+	for (int i = 0; i <= h; ++i)
+	{
+		dd->vertex(ox,oy,oz+i*size, col);
+		dd->vertex(ox+w*size,oy,oz+i*size, col);
+	}
+	for (int i = 0; i <= w; ++i)
+	{
+		dd->vertex(ox+i*size,oy,oz, col);
+		dd->vertex(ox+i*size,oy,oz+h*size, col);
+	}
+	dd->end();
+}
+		 
+
+void duAppendCylinderWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+						  float maxx, float maxy, float maxz, unsigned int col)
+{
+	if (!dd) return;
+
+	static const int NUM_SEG = 16;
+	static float dir[NUM_SEG*2];
+	static bool init = false;
+	if (!init)
+	{
+		init = true;
+		for (int i = 0; i < NUM_SEG; ++i)
+		{
+			const float a = (float)i/(float)NUM_SEG*DU_PI*2;
+			dir[i*2] = cosf(a);
+			dir[i*2+1] = sinf(a);
+		}
+	}
+	
+	const float cx = (maxx + minx)/2;
+	const float cz = (maxz + minz)/2;
+	const float rx = (maxx - minx)/2;
+	const float rz = (maxz - minz)/2;
+	
+	for (int i = 0, j = NUM_SEG-1; i < NUM_SEG; j = i++)
+	{
+		dd->vertex(cx+dir[j*2+0]*rx, miny, cz+dir[j*2+1]*rz, col);
+		dd->vertex(cx+dir[i*2+0]*rx, miny, cz+dir[i*2+1]*rz, col);
+		dd->vertex(cx+dir[j*2+0]*rx, maxy, cz+dir[j*2+1]*rz, col);
+		dd->vertex(cx+dir[i*2+0]*rx, maxy, cz+dir[i*2+1]*rz, col);
+	}
+	for (int i = 0; i < NUM_SEG; i += NUM_SEG/4)
+	{
+		dd->vertex(cx+dir[i*2+0]*rx, miny, cz+dir[i*2+1]*rz, col);
+		dd->vertex(cx+dir[i*2+0]*rx, maxy, cz+dir[i*2+1]*rz, col);
+	}
+}
+
+void duAppendBoxWire(struct duDebugDraw* dd, float minx, float miny, float minz,
+					 float maxx, float maxy, float maxz, unsigned int col)
+{
+	if (!dd) return;
+	// Top
+	dd->vertex(minx, miny, minz, col);
+	dd->vertex(maxx, miny, minz, col);
+	dd->vertex(maxx, miny, minz, col);
+	dd->vertex(maxx, miny, maxz, col);
+	dd->vertex(maxx, miny, maxz, col);
+	dd->vertex(minx, miny, maxz, col);
+	dd->vertex(minx, miny, maxz, col);
+	dd->vertex(minx, miny, minz, col);
+	
+	// bottom
+	dd->vertex(minx, maxy, minz, col);
+	dd->vertex(maxx, maxy, minz, col);
+	dd->vertex(maxx, maxy, minz, col);
+	dd->vertex(maxx, maxy, maxz, col);
+	dd->vertex(maxx, maxy, maxz, col);
+	dd->vertex(minx, maxy, maxz, col);
+	dd->vertex(minx, maxy, maxz, col);
+	dd->vertex(minx, maxy, minz, col);
+	
+	// Sides
+	dd->vertex(minx, miny, minz, col);
+	dd->vertex(minx, maxy, minz, col);
+	dd->vertex(maxx, miny, minz, col);
+	dd->vertex(maxx, maxy, minz, col);
+	dd->vertex(maxx, miny, maxz, col);
+	dd->vertex(maxx, maxy, maxz, col);
+	dd->vertex(minx, miny, maxz, col);
+	dd->vertex(minx, maxy, maxz, col);
+}
+
+void duAppendBoxPoints(struct duDebugDraw* dd, float minx, float miny, float minz,
+					   float maxx, float maxy, float maxz, unsigned int col)
+{
+	if (!dd) return;
+	// Top
+	dd->vertex(minx, miny, minz, col);
+	dd->vertex(maxx, miny, minz, col);
+	dd->vertex(maxx, miny, minz, col);
+	dd->vertex(maxx, miny, maxz, col);
+	dd->vertex(maxx, miny, maxz, col);
+	dd->vertex(minx, miny, maxz, col);
+	dd->vertex(minx, miny, maxz, col);
+	dd->vertex(minx, miny, minz, col);
+	
+	// bottom
+	dd->vertex(minx, maxy, minz, col);
+	dd->vertex(maxx, maxy, minz, col);
+	dd->vertex(maxx, maxy, minz, col);
+	dd->vertex(maxx, maxy, maxz, col);
+	dd->vertex(maxx, maxy, maxz, col);
+	dd->vertex(minx, maxy, maxz, col);
+	dd->vertex(minx, maxy, maxz, col);
+	dd->vertex(minx, maxy, minz, col);
+}
+
+void duAppendBox(struct duDebugDraw* dd, float minx, float miny, float minz,
+				 float maxx, float maxy, float maxz, const unsigned int* fcol)
+{
+	if (!dd) return;
+	const float verts[8*3] =
+	{
+		minx, miny, minz,
+		maxx, miny, minz,
+		maxx, miny, maxz,
+		minx, miny, maxz,
+		minx, maxy, minz,
+		maxx, maxy, minz,
+		maxx, maxy, maxz,
+		minx, maxy, maxz,
+	};
+	static const unsigned char inds[6*4] =
+	{
+		7, 6, 5, 4,
+		0, 1, 2, 3,
+		1, 5, 6, 2,
+		3, 7, 4, 0,
+		2, 6, 7, 3,
+		0, 4, 5, 1,
+	};
+	
+	const unsigned char* in = inds;
+	for (int i = 0; i < 6; ++i)
+	{
+		dd->vertex(&verts[*in*3], fcol[i]); in++;
+		dd->vertex(&verts[*in*3], fcol[i]); in++;
+		dd->vertex(&verts[*in*3], fcol[i]); in++;
+		dd->vertex(&verts[*in*3], fcol[i]); in++;
+	}
+}
+
+void duAppendCylinder(struct duDebugDraw* dd, float minx, float miny, float minz,
+					  float maxx, float maxy, float maxz, unsigned int col)
+{
+	if (!dd) return;
+	
+	static const int NUM_SEG = 16;
+	static float dir[NUM_SEG*2];
+	static bool init = false;
+	if (!init)
+	{
+		init = true;
+		for (int i = 0; i < NUM_SEG; ++i)
+		{
+			const float a = (float)i/(float)NUM_SEG*DU_PI*2;
+			dir[i*2] = cosf(a);
+			dir[i*2+1] = sinf(a);
+		}
+	}
+	
+	unsigned int col2 = duMultCol(col, 160);
+	
+	const float cx = (maxx + minx)/2;
+	const float cz = (maxz + minz)/2;
+	const float rx = (maxx - minx)/2;
+	const float rz = (maxz - minz)/2;
+
+	for (int i = 2; i < NUM_SEG; ++i)
+	{
+		const int a = 0, b = i-1, c = i;
+		dd->vertex(cx+dir[a*2+0]*rx, miny, cz+dir[a*2+1]*rz, col2);
+		dd->vertex(cx+dir[b*2+0]*rx, miny, cz+dir[b*2+1]*rz, col2);
+		dd->vertex(cx+dir[c*2+0]*rx, miny, cz+dir[c*2+1]*rz, col2);
+	}
+	for (int i = 2; i < NUM_SEG; ++i)
+	{
+		const int a = 0, b = i, c = i-1;
+		dd->vertex(cx+dir[a*2+0]*rx, maxy, cz+dir[a*2+1]*rz, col);
+		dd->vertex(cx+dir[b*2+0]*rx, maxy, cz+dir[b*2+1]*rz, col);
+		dd->vertex(cx+dir[c*2+0]*rx, maxy, cz+dir[c*2+1]*rz, col);
+	}
+	for (int i = 0, j = NUM_SEG-1; i < NUM_SEG; j = i++)
+	{
+		dd->vertex(cx+dir[i*2+0]*rx, miny, cz+dir[i*2+1]*rz, col2);
+		dd->vertex(cx+dir[j*2+0]*rx, miny, cz+dir[j*2+1]*rz, col2);
+		dd->vertex(cx+dir[j*2+0]*rx, maxy, cz+dir[j*2+1]*rz, col);
+
+		dd->vertex(cx+dir[i*2+0]*rx, miny, cz+dir[i*2+1]*rz, col2);
+		dd->vertex(cx+dir[j*2+0]*rx, maxy, cz+dir[j*2+1]*rz, col);
+		dd->vertex(cx+dir[i*2+0]*rx, maxy, cz+dir[i*2+1]*rz, col);
+	}
+}
+
+
+inline void evalArc(const float x0, const float y0, const float z0,
+					const float dx, const float dy, const float dz,
+					const float h, const float u, float* res)
+{
+	res[0] = x0 + dx * u;
+	res[1] = y0 + dy * u + h * (1-(u*2-1)*(u*2-1));
+	res[2] = z0 + dz * u;
+}
+
+
+inline void vcross(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[1]*v2[2] - v1[2]*v2[1];
+	dest[1] = v1[2]*v2[0] - v1[0]*v2[2];
+	dest[2] = v1[0]*v2[1] - v1[1]*v2[0]; 
+}
+
+inline void vnormalize(float* v)
+{
+	float d = 1.0f / sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
+	v[0] *= d;
+	v[1] *= d;
+	v[2] *= d;
+}
+
+inline void vsub(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[0]-v2[0];
+	dest[1] = v1[1]-v2[1];
+	dest[2] = v1[2]-v2[2];
+}
+
+inline float vdistSqr(const float* v1, const float* v2)
+{
+	const float x = v1[0]-v2[0];
+	const float y = v1[1]-v2[1];
+	const float z = v1[2]-v2[2];
+	return x*x + y*y + z*z;
+}
+
+
+void appendArrowHead(struct duDebugDraw* dd, const float* p, const float* q,
+					 const float s, unsigned int col)
+{
+	const float eps = 0.001f;
+	if (!dd) return;
+	if (vdistSqr(p,q) < eps*eps) return;
+	float ax[3], ay[3] = {0,1,0}, az[3];
+	vsub(az, q, p);
+	vnormalize(az);
+	vcross(ax, ay, az);
+	vcross(ay, az, ax);
+	vnormalize(ay);
+
+	dd->vertex(p, col);
+//	dd->vertex(p[0]+az[0]*s+ay[0]*s/2, p[1]+az[1]*s+ay[1]*s/2, p[2]+az[2]*s+ay[2]*s/2, col);
+	dd->vertex(p[0]+az[0]*s+ax[0]*s/3, p[1]+az[1]*s+ax[1]*s/3, p[2]+az[2]*s+ax[2]*s/3, col);
+
+	dd->vertex(p, col);
+//	dd->vertex(p[0]+az[0]*s-ay[0]*s/2, p[1]+az[1]*s-ay[1]*s/2, p[2]+az[2]*s-ay[2]*s/2, col);
+	dd->vertex(p[0]+az[0]*s-ax[0]*s/3, p[1]+az[1]*s-ax[1]*s/3, p[2]+az[2]*s-ax[2]*s/3, col);
+	
+}
+
+void duAppendArc(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+				 const float x1, const float y1, const float z1, const float h,
+				 const float as0, const float as1, unsigned int col)
+{
+	if (!dd) return;
+	static const int NUM_ARC_PTS = 8;
+	static const float PAD = 0.05f;
+	static const float ARC_PTS_SCALE = (1.0f-PAD*2) / (float)NUM_ARC_PTS;
+	const float dx = x1 - x0;
+	const float dy = y1 - y0;
+	const float dz = z1 - z0;
+	const float len = sqrtf(dx*dx + dy*dy + dz*dz);
+	float prev[3];
+	evalArc(x0,y0,z0, dx,dy,dz, len*h, PAD, prev);
+	for (int i = 1; i <= NUM_ARC_PTS; ++i)
+	{
+		const float u = PAD + i * ARC_PTS_SCALE;
+		float pt[3];
+		evalArc(x0,y0,z0, dx,dy,dz, len*h, u, pt);
+		dd->vertex(prev[0],prev[1],prev[2], col);
+		dd->vertex(pt[0],pt[1],pt[2], col);
+		prev[0] = pt[0]; prev[1] = pt[1]; prev[2] = pt[2];
+	}
+	
+	// End arrows
+	if (as0 > 0.001f)
+	{
+		float p[3], q[3];
+		evalArc(x0,y0,z0, dx,dy,dz, len*h, PAD, p);
+		evalArc(x0,y0,z0, dx,dy,dz, len*h, PAD+0.05f, q);
+		appendArrowHead(dd, p, q, as0, col);
+	}
+
+	if (as1 > 0.001f)
+	{
+		float p[3], q[3];
+		evalArc(x0,y0,z0, dx,dy,dz, len*h, 1-PAD, p);
+		evalArc(x0,y0,z0, dx,dy,dz, len*h, 1-(PAD+0.05f), q);
+		appendArrowHead(dd, p, q, as1, col);
+	}
+}
+
+void duAppendArrow(struct duDebugDraw* dd, const float x0, const float y0, const float z0,
+				   const float x1, const float y1, const float z1,
+				   const float as0, const float as1, unsigned int col)
+{
+	if (!dd) return;
+
+	dd->vertex(x0,y0,z0, col);
+	dd->vertex(x1,y1,z1, col);
+	
+	// End arrows
+	const float p[3] = {x0,y0,z0}, q[3] = {x1,y1,z1};
+	if (as0 > 0.001f)
+		appendArrowHead(dd, p, q, as0, col);
+	if (as1 > 0.001f)
+		appendArrowHead(dd, q, p, as1, col);
+}
+
+void duAppendCircle(struct duDebugDraw* dd, const float x, const float y, const float z,
+					const float r, unsigned int col)
+{
+	if (!dd) return;
+	static const int NUM_SEG = 40;
+	static float dir[40*2];
+	static bool init = false;
+	if (!init)
+	{
+		init = true;
+		for (int i = 0; i < NUM_SEG; ++i)
+		{
+			const float a = (float)i/(float)NUM_SEG*DU_PI*2;
+			dir[i*2] = cosf(a);
+			dir[i*2+1] = sinf(a);
+		}
+	}
+	
+	for (int i = 0, j = NUM_SEG-1; i < NUM_SEG; j = i++)
+	{
+		dd->vertex(x+dir[j*2+0]*r, y, z+dir[j*2+1]*r, col);
+		dd->vertex(x+dir[i*2+0]*r, y, z+dir[i*2+1]*r, col);
+	}
+}
+
+void duAppendCross(struct duDebugDraw* dd, const float x, const float y, const float z,
+				   const float s, unsigned int col)
+{
+	if (!dd) return;
+	dd->vertex(x-s,y,z, col);
+	dd->vertex(x+s,y,z, col);
+	dd->vertex(x,y-s,z, col);
+	dd->vertex(x,y+s,z, col);
+	dd->vertex(x,y,z-s, col);
+	dd->vertex(x,y,z+s, col);
+}
+
+duDisplayList::duDisplayList(int cap) :
+	m_pos(0),
+	m_color(0),
+	m_size(0),
+	m_cap(0),
+	m_depthMask(true),
+	m_prim(DU_DRAW_LINES),
+	m_primSize(1.0f)
+{
+	if (cap < 8)
+		cap = 8;
+	resize(cap);
+}
+
+duDisplayList::~duDisplayList()
+{
+	delete [] m_pos;
+	delete [] m_color;
+}
+
+void duDisplayList::resize(int cap)
+{
+	float* newPos = new float[cap*3];
+	if (m_size)
+		memcpy(newPos, m_pos, sizeof(float)*3*m_size);
+	delete [] m_pos;
+	m_pos = newPos;
+
+	unsigned int* newColor = new unsigned int[cap];
+	if (m_size)
+		memcpy(newColor, m_color, sizeof(unsigned int)*m_size);
+	delete [] m_color;
+	m_color = newColor;
+	
+	m_cap = cap;
+}
+
+void duDisplayList::clear()
+{
+	m_size = 0;
+}
+
+void duDisplayList::depthMask(bool state)
+{
+	m_depthMask = state;
+}
+
+void duDisplayList::begin(duDebugDrawPrimitives prim, float size)
+{
+	clear();
+	m_prim = prim;
+	m_primSize = size;
+}
+
+void duDisplayList::vertex(const float x, const float y, const float z, unsigned int color)
+{
+	if (m_size+1 >= m_cap)
+		resize(m_cap*2);
+	float* p = &m_pos[m_size*3];
+	p[0] = x;
+	p[1] = y;
+	p[2] = z;
+	m_color[m_size] = color;
+	m_size++;
+}
+
+void duDisplayList::vertex(const float* pos, unsigned int color)
+{
+	vertex(pos[0],pos[1],pos[2],color);
+}
+
+void duDisplayList::end()
+{
+}
+
+void duDisplayList::draw(struct duDebugDraw* dd)
+{
+	if (!dd) return;
+	if (!m_size) return;
+	dd->depthMask(m_depthMask);
+	dd->begin(m_prim, m_primSize);
+	for (int i = 0; i < m_size; ++i)
+		dd->vertex(&m_pos[i*3], m_color[i]);
+	dd->end();
+}
diff --git a/Engine/lib/recast/DebugUtils/Source/DetourDebugDraw.cpp b/Engine/lib/recast/DebugUtils/Source/DetourDebugDraw.cpp
new file mode 100644
index 000000000..d9b778327
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Source/DetourDebugDraw.cpp
@@ -0,0 +1,868 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <math.h>
+#include "DebugDraw.h"
+#include "DetourDebugDraw.h"
+#include "DetourNavMesh.h"
+#include "DetourCommon.h"
+#include "DetourNode.h"
+
+
+static float distancePtLine2d(const float* pt, const float* p, const float* q)
+{
+	float pqx = q[0] - p[0];
+	float pqz = q[2] - p[2];
+	float dx = pt[0] - p[0];
+	float dz = pt[2] - p[2];
+	float d = pqx*pqx + pqz*pqz;
+	float t = pqx*dx + pqz*dz;
+	if (d != 0) t /= d;
+	dx = p[0] + t*pqx - pt[0];
+	dz = p[2] + t*pqz - pt[2];
+	return dx*dx + dz*dz;
+}
+
+static void drawPolyBoundaries(duDebugDraw* dd, const dtMeshTile* tile,
+							   const unsigned int col, const float linew,
+							   bool inner)
+{
+	static const float thr = 0.01f*0.01f;
+
+	dd->begin(DU_DRAW_LINES, linew);
+
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		const dtPoly* p = &tile->polys[i];
+		
+		if (p->getType() == DT_POLYTYPE_OFFMESH_CONNECTION) continue;
+		
+		const dtPolyDetail* pd = &tile->detailMeshes[i];
+		
+		for (int j = 0, nj = (int)p->vertCount; j < nj; ++j)
+		{
+			unsigned int c = col;
+			if (inner)
+			{
+				if (p->neis[j] == 0) continue;
+				if (p->neis[j] & DT_EXT_LINK)
+				{
+					bool con = false;
+					for (unsigned int k = p->firstLink; k != DT_NULL_LINK; k = tile->links[k].next)
+					{
+						if (tile->links[k].edge == j)
+						{
+							con = true;
+							break;
+						}
+					}
+					if (con)
+						c = duRGBA(255,255,255,48);
+					else
+						c = duRGBA(0,0,0,48);
+				}
+				else
+					c = duRGBA(0,48,64,32);
+			}
+			else
+			{
+				if (p->neis[j] != 0) continue;
+			}
+			
+			const float* v0 = &tile->verts[p->verts[j]*3];
+			const float* v1 = &tile->verts[p->verts[(j+1) % nj]*3];
+			
+			// Draw detail mesh edges which align with the actual poly edge.
+			// This is really slow.
+			for (int k = 0; k < pd->triCount; ++k)
+			{
+				const unsigned char* t = &tile->detailTris[(pd->triBase+k)*4];
+				const float* tv[3];
+				for (int m = 0; m < 3; ++m)
+				{
+					if (t[m] < p->vertCount)
+						tv[m] = &tile->verts[p->verts[t[m]]*3];
+					else
+						tv[m] = &tile->detailVerts[(pd->vertBase+(t[m]-p->vertCount))*3];
+				}
+				for (int m = 0, n = 2; m < 3; n=m++)
+				{
+					if (((t[3] >> (n*2)) & 0x3) == 0) continue;	// Skip inner detail edges.
+					if (distancePtLine2d(tv[n],v0,v1) < thr &&
+						distancePtLine2d(tv[m],v0,v1) < thr)
+					{
+						dd->vertex(tv[n], c);
+						dd->vertex(tv[m], c);
+					}
+				}
+			}
+		}
+	}
+	dd->end();
+}
+
+static void drawMeshTile(duDebugDraw* dd, const dtNavMesh& mesh, const dtNavMeshQuery* query,
+						 const dtMeshTile* tile, unsigned char flags)
+{
+	dtPolyRef base = mesh.getPolyRefBase(tile);
+
+	int tileNum = mesh.decodePolyIdTile(base);
+	
+	dd->depthMask(false);
+
+	dd->begin(DU_DRAW_TRIS);
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		const dtPoly* p = &tile->polys[i];
+		if (p->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)	// Skip off-mesh links.
+			continue;
+			
+		const dtPolyDetail* pd = &tile->detailMeshes[i];
+
+		unsigned int col;
+		if (query && query->isInClosedList(base | (dtPolyRef)i))
+			col = duRGBA(255,196,0,64);
+		else
+		{
+			if (flags & DU_DRAWNAVMESH_COLOR_TILES)
+			{
+				col = duIntToCol(tileNum, 128);
+			}
+			else
+			{
+				if (p->getArea() == 0) // Treat zero area type as default.
+					col = duRGBA(0,192,255,64);
+				else
+					col = duIntToCol(p->getArea(), 64);
+			}
+		}
+		
+		for (int j = 0; j < pd->triCount; ++j)
+		{
+			const unsigned char* t = &tile->detailTris[(pd->triBase+j)*4];
+			for (int k = 0; k < 3; ++k)
+			{
+				if (t[k] < p->vertCount)
+					dd->vertex(&tile->verts[p->verts[t[k]]*3], col);
+				else
+					dd->vertex(&tile->detailVerts[(pd->vertBase+t[k]-p->vertCount)*3], col);
+			}
+		}
+	}
+	dd->end();
+	
+	// Draw inter poly boundaries
+	drawPolyBoundaries(dd, tile, duRGBA(0,48,64,32), 1.5f, true);
+	
+	// Draw outer poly boundaries
+	drawPolyBoundaries(dd, tile, duRGBA(0,48,64,220), 2.5f, false);
+
+	if (flags & DU_DRAWNAVMESH_OFFMESHCONS)
+	{
+		dd->begin(DU_DRAW_LINES, 2.0f);
+		for (int i = 0; i < tile->header->polyCount; ++i)
+		{
+			const dtPoly* p = &tile->polys[i];
+			if (p->getType() != DT_POLYTYPE_OFFMESH_CONNECTION)	// Skip regular polys.
+				continue;
+			
+			unsigned int col, col2;
+			if (query && query->isInClosedList(base | (dtPolyRef)i))
+				col = duRGBA(255,196,0,220);
+			else
+				col = duDarkenCol(duIntToCol(p->getArea(), 220));
+			
+			const dtOffMeshConnection* con = &tile->offMeshCons[i - tile->header->offMeshBase];
+			const float* va = &tile->verts[p->verts[0]*3];
+			const float* vb = &tile->verts[p->verts[1]*3];
+
+			// Check to see if start and end end-points have links.
+			bool startSet = false;
+			bool endSet = false;
+			for (unsigned int k = p->firstLink; k != DT_NULL_LINK; k = tile->links[k].next)
+			{
+				if (tile->links[k].edge == 0)
+					startSet = true;
+				if (tile->links[k].edge == 1)
+					endSet = true;
+			}
+			
+			// End points and their on-mesh locations.
+			dd->vertex(va[0],va[1],va[2], col);
+			dd->vertex(con->pos[0],con->pos[1],con->pos[2], col);
+			col2 = startSet ? col : duRGBA(220,32,16,196);
+			duAppendCircle(dd, con->pos[0],con->pos[1]+0.1f,con->pos[2], con->rad, col2);
+
+			dd->vertex(vb[0],vb[1],vb[2], col);
+			dd->vertex(con->pos[3],con->pos[4],con->pos[5], col);
+			col2 = endSet ? col : duRGBA(220,32,16,196);
+			duAppendCircle(dd, con->pos[3],con->pos[4]+0.1f,con->pos[5], con->rad, col2);
+			
+			// End point vertices.
+			dd->vertex(con->pos[0],con->pos[1],con->pos[2], duRGBA(0,48,64,196));
+			dd->vertex(con->pos[0],con->pos[1]+0.2f,con->pos[2], duRGBA(0,48,64,196));
+			
+			dd->vertex(con->pos[3],con->pos[4],con->pos[5], duRGBA(0,48,64,196));
+			dd->vertex(con->pos[3],con->pos[4]+0.2f,con->pos[5], duRGBA(0,48,64,196));
+			
+			// Connection arc.
+			duAppendArc(dd, con->pos[0],con->pos[1],con->pos[2], con->pos[3],con->pos[4],con->pos[5], 0.25f,
+						(con->flags & 1) ? 0.6f : 0, 0.6f, col);
+		}
+		dd->end();
+	}
+	
+	const unsigned int vcol = duRGBA(0,0,0,196);
+	dd->begin(DU_DRAW_POINTS, 3.0f);
+	for (int i = 0; i < tile->header->vertCount; ++i)
+	{
+		const float* v = &tile->verts[i*3];
+		dd->vertex(v[0], v[1], v[2], vcol);
+	}
+	dd->end();
+
+	dd->depthMask(true);
+}
+
+void duDebugDrawNavMesh(duDebugDraw* dd, const dtNavMesh& mesh, unsigned char flags)
+{
+	if (!dd) return;
+	
+	for (int i = 0; i < mesh.getMaxTiles(); ++i)
+	{
+		const dtMeshTile* tile = mesh.getTile(i);
+		if (!tile->header) continue;
+		drawMeshTile(dd, mesh, 0, tile, flags);
+	}
+}
+
+void duDebugDrawNavMeshWithClosedList(struct duDebugDraw* dd, const dtNavMesh& mesh, const dtNavMeshQuery& query, unsigned char flags)
+{
+	if (!dd) return;
+
+	const dtNavMeshQuery* q = (flags & DU_DRAWNAVMESH_CLOSEDLIST) ? &query : 0;
+	
+	for (int i = 0; i < mesh.getMaxTiles(); ++i)
+	{
+		const dtMeshTile* tile = mesh.getTile(i);
+		if (!tile->header) continue;
+		drawMeshTile(dd, mesh, q, tile, flags);
+	}
+}
+
+void duDebugDrawNavMeshNodes(struct duDebugDraw* dd, const dtNavMeshQuery& query)
+{
+	if (!dd) return;
+	
+	const dtNodePool* pool = query.getNodePool();
+	if (pool)
+	{
+		const float off = 0.5f;
+		dd->begin(DU_DRAW_POINTS, 4.0f);
+		for (int i = 0; i < pool->getHashSize(); ++i)
+		{
+			for (dtNodeIndex j = pool->getFirst(i); j != DT_NULL_IDX; j = pool->getNext(j))
+			{
+				const dtNode* node = pool->getNodeAtIdx(j+1);
+				if (!node) continue;
+				dd->vertex(node->pos[0],node->pos[1]+off,node->pos[2], duRGBA(255,192,0,255));
+			}
+		}
+		dd->end();
+		
+		dd->begin(DU_DRAW_LINES, 2.0f);
+		for (int i = 0; i < pool->getHashSize(); ++i)
+		{
+			for (dtNodeIndex j = pool->getFirst(i); j != DT_NULL_IDX; j = pool->getNext(j))
+			{
+				const dtNode* node = pool->getNodeAtIdx(j+1);
+				if (!node) continue;
+				if (!node->pidx) continue;
+				const dtNode* parent = pool->getNodeAtIdx(node->pidx);
+				if (!parent) continue;
+				dd->vertex(node->pos[0],node->pos[1]+off,node->pos[2], duRGBA(255,192,0,128));
+				dd->vertex(parent->pos[0],parent->pos[1]+off,parent->pos[2], duRGBA(255,192,0,128));
+			}
+		}
+		dd->end();
+	}
+}
+
+
+static void drawMeshTileBVTree(duDebugDraw* dd, const dtMeshTile* tile)
+{
+	// Draw BV nodes.
+	const float cs = 1.0f / tile->header->bvQuantFactor;
+	dd->begin(DU_DRAW_LINES, 1.0f);
+	for (int i = 0; i < tile->header->bvNodeCount; ++i)
+	{
+		const dtBVNode* n = &tile->bvTree[i];
+		if (n->i < 0) // Leaf indices are positive.
+			continue;
+		duAppendBoxWire(dd, tile->header->bmin[0] + n->bmin[0]*cs,
+						tile->header->bmin[1] + n->bmin[1]*cs,
+						tile->header->bmin[2] + n->bmin[2]*cs,
+						tile->header->bmin[0] + n->bmax[0]*cs,
+						tile->header->bmin[1] + n->bmax[1]*cs,
+						tile->header->bmin[2] + n->bmax[2]*cs,
+						duRGBA(255,255,255,128));
+	}
+	dd->end();
+}
+
+void duDebugDrawNavMeshBVTree(duDebugDraw* dd, const dtNavMesh& mesh)
+{
+	if (!dd) return;
+	
+	for (int i = 0; i < mesh.getMaxTiles(); ++i)
+	{
+		const dtMeshTile* tile = mesh.getTile(i);
+		if (!tile->header) continue;
+		drawMeshTileBVTree(dd, tile);
+	}
+}
+
+static void drawMeshTilePortal(duDebugDraw* dd, const dtMeshTile* tile)
+{
+	// Draw portals
+	const float padx = 0.04f;
+	const float pady = tile->header->walkableClimb;
+
+	dd->begin(DU_DRAW_LINES, 2.0f);
+
+	for (int side = 0; side < 8; ++side)
+	{
+		unsigned short m = DT_EXT_LINK | (unsigned short)side;
+		
+		for (int i = 0; i < tile->header->polyCount; ++i)
+		{
+			dtPoly* poly = &tile->polys[i];
+			
+			// Create new links.
+			const int nv = poly->vertCount;
+			for (int j = 0; j < nv; ++j)
+			{
+				// Skip edges which do not point to the right side.
+				if (poly->neis[j] != m)
+					continue;
+				
+				// Create new links
+				const float* va = &tile->verts[poly->verts[j]*3];
+				const float* vb = &tile->verts[poly->verts[(j+1) % nv]*3];
+				
+				if (side == 0 || side == 4)
+				{
+					unsigned int col = side == 0 ? duRGBA(128,0,0,128) : duRGBA(128,0,128,128);
+
+					const float x = va[0] + ((side == 0) ? -padx : padx);
+					
+					dd->vertex(x,va[1]-pady,va[2], col);
+					dd->vertex(x,va[1]+pady,va[2], col);
+
+					dd->vertex(x,va[1]+pady,va[2], col);
+					dd->vertex(x,vb[1]+pady,vb[2], col);
+
+					dd->vertex(x,vb[1]+pady,vb[2], col);
+					dd->vertex(x,vb[1]-pady,vb[2], col);
+
+					dd->vertex(x,vb[1]-pady,vb[2], col);
+					dd->vertex(x,va[1]-pady,va[2], col);
+				}
+				else if (side == 2 || side == 6)
+				{
+					unsigned int col = side == 2 ? duRGBA(0,128,0,128) : duRGBA(0,128,128,128);
+
+					const float z = va[2] + ((side == 2) ? -padx : padx);
+					
+					dd->vertex(va[0],va[1]-pady,z, col);
+					dd->vertex(va[0],va[1]+pady,z, col);
+					
+					dd->vertex(va[0],va[1]+pady,z, col);
+					dd->vertex(vb[0],vb[1]+pady,z, col);
+					
+					dd->vertex(vb[0],vb[1]+pady,z, col);
+					dd->vertex(vb[0],vb[1]-pady,z, col);
+					
+					dd->vertex(vb[0],vb[1]-pady,z, col);
+					dd->vertex(va[0],va[1]-pady,z, col);
+				}
+
+			}
+		}
+	}
+	
+	dd->end();
+}
+
+void duDebugDrawNavMeshPortals(duDebugDraw* dd, const dtNavMesh& mesh)
+{
+	if (!dd) return;
+	
+	for (int i = 0; i < mesh.getMaxTiles(); ++i)
+	{
+		const dtMeshTile* tile = mesh.getTile(i);
+		if (!tile->header) continue;
+		drawMeshTilePortal(dd, tile);
+	}
+}
+
+void duDebugDrawNavMeshPolysWithFlags(struct duDebugDraw* dd, const dtNavMesh& mesh,
+									  const unsigned short polyFlags, const unsigned int col)
+{
+	if (!dd) return;
+	
+	for (int i = 0; i < mesh.getMaxTiles(); ++i)
+	{
+		const dtMeshTile* tile = mesh.getTile(i);
+		if (!tile->header) continue;
+		dtPolyRef base = mesh.getPolyRefBase(tile);
+
+		for (int j = 0; j < tile->header->polyCount; ++j)
+		{
+			const dtPoly* p = &tile->polys[j];
+			if ((p->flags & polyFlags) == 0) continue;
+			duDebugDrawNavMeshPoly(dd, mesh, base|(dtPolyRef)j, col);
+		}
+	}
+}
+
+void duDebugDrawNavMeshPoly(duDebugDraw* dd, const dtNavMesh& mesh, dtPolyRef ref, const unsigned int col)
+{
+	if (!dd) return;
+	
+	const dtMeshTile* tile = 0;
+	const dtPoly* poly = 0;
+	if (dtStatusFailed(mesh.getTileAndPolyByRef(ref, &tile, &poly)))
+		return;
+	
+	dd->depthMask(false);
+	
+	const unsigned int c = (col & 0x00ffffff) | (64 << 24);
+	const unsigned int ip = (unsigned int)(poly - tile->polys);
+
+	if (poly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+	{
+		dtOffMeshConnection* con = &tile->offMeshCons[ip - tile->header->offMeshBase];
+
+		dd->begin(DU_DRAW_LINES, 2.0f);
+
+		// Connection arc.
+		duAppendArc(dd, con->pos[0],con->pos[1],con->pos[2], con->pos[3],con->pos[4],con->pos[5], 0.25f,
+					(con->flags & 1) ? 0.6f : 0, 0.6f, c);
+		
+		dd->end();
+	}
+	else
+	{
+		const dtPolyDetail* pd = &tile->detailMeshes[ip];
+
+		dd->begin(DU_DRAW_TRIS);
+		for (int i = 0; i < pd->triCount; ++i)
+		{
+			const unsigned char* t = &tile->detailTris[(pd->triBase+i)*4];
+			for (int j = 0; j < 3; ++j)
+			{
+				if (t[j] < poly->vertCount)
+					dd->vertex(&tile->verts[poly->verts[t[j]]*3], c);
+				else
+					dd->vertex(&tile->detailVerts[(pd->vertBase+t[j]-poly->vertCount)*3], c);
+			}
+		}
+		dd->end();
+	}
+	
+	dd->depthMask(true);
+
+}
+
+static void debugDrawTileCachePortals(struct duDebugDraw* dd, const dtTileCacheLayer& layer, const float cs, const float ch)
+{
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	const float* bmin = layer.header->bmin;
+
+	// Portals
+	unsigned int pcol = duRGBA(255,255,255,255);
+	
+	const int segs[4*4] = {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0};
+	
+	// Layer portals
+	dd->begin(DU_DRAW_LINES, 2.0f);
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const int idx = x+y*w;
+			const int lh = (int)layer.heights[idx];
+			if (lh == 0xff) continue;
+			
+			for (int dir = 0; dir < 4; ++dir)
+			{
+				if (layer.cons[idx] & (1<<(dir+4)))
+				{
+					const int* seg = &segs[dir*4];
+					const float ax = bmin[0] + (x+seg[0])*cs;
+					const float ay = bmin[1] + (lh+2)*ch;
+					const float az = bmin[2] + (y+seg[1])*cs;
+					const float bx = bmin[0] + (x+seg[2])*cs;
+					const float by = bmin[1] + (lh+2)*ch;
+					const float bz = bmin[2] + (y+seg[3])*cs;
+					dd->vertex(ax, ay, az, pcol);
+					dd->vertex(bx, by, bz, pcol);
+				}
+			}
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawTileCacheLayerAreas(struct duDebugDraw* dd, const dtTileCacheLayer& layer, const float cs, const float ch)
+{
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	const float* bmin = layer.header->bmin;
+	const float* bmax = layer.header->bmax;
+	const int idx = layer.header->tlayer;
+	
+	unsigned int color = duIntToCol(idx+1, 255);
+	
+	// Layer bounds
+	float lbmin[3], lbmax[3];
+	lbmin[0] = bmin[0] + layer.header->minx*cs;
+	lbmin[1] = bmin[1];
+	lbmin[2] = bmin[2] + layer.header->miny*cs;
+	lbmax[0] = bmin[0] + (layer.header->maxx+1)*cs;
+	lbmax[1] = bmax[1];
+	lbmax[2] = bmin[2] + (layer.header->maxy+1)*cs;
+	duDebugDrawBoxWire(dd, lbmin[0],lbmin[1],lbmin[2], lbmax[0],lbmax[1],lbmax[2], duTransCol(color,128), 2.0f);
+	
+	// Layer height
+	dd->begin(DU_DRAW_QUADS);
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const int lidx = x+y*w;
+			const int lh = (int)layer.heights[lidx];
+			if (lh == 0xff) continue;
+			const unsigned char area = layer.areas[lidx];
+			
+			unsigned int col;
+			if (area == 63)
+				col = duLerpCol(color, duRGBA(0,192,255,64), 32);
+			else if (area == 0)
+				col = duLerpCol(color, duRGBA(0,0,0,64), 32);
+			else
+				col = duLerpCol(color, duIntToCol(area, 255), 32);
+			
+			const float fx = bmin[0] + x*cs;
+			const float fy = bmin[1] + (lh+1)*ch;
+			const float fz = bmin[2] + y*cs;
+			
+			dd->vertex(fx, fy, fz, col);
+			dd->vertex(fx, fy, fz+cs, col);
+			dd->vertex(fx+cs, fy, fz+cs, col);
+			dd->vertex(fx+cs, fy, fz, col);
+		}
+	}
+	dd->end();
+	
+	debugDrawTileCachePortals(dd, layer, cs, ch);
+}
+
+void duDebugDrawTileCacheLayerRegions(struct duDebugDraw* dd, const dtTileCacheLayer& layer, const float cs, const float ch)
+{
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	const float* bmin = layer.header->bmin;
+	const float* bmax = layer.header->bmax;
+	const int idx = layer.header->tlayer;
+	
+	unsigned int color = duIntToCol(idx+1, 255);
+	
+	// Layer bounds
+	float lbmin[3], lbmax[3];
+	lbmin[0] = bmin[0] + layer.header->minx*cs;
+	lbmin[1] = bmin[1];
+	lbmin[2] = bmin[2] + layer.header->miny*cs;
+	lbmax[0] = bmin[0] + (layer.header->maxx+1)*cs;
+	lbmax[1] = bmax[1];
+	lbmax[2] = bmin[2] + (layer.header->maxy+1)*cs;
+	duDebugDrawBoxWire(dd, lbmin[0],lbmin[1],lbmin[2], lbmax[0],lbmax[1],lbmax[2], duTransCol(color,128), 2.0f);
+	
+	// Layer height
+	dd->begin(DU_DRAW_QUADS);
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const int lidx = x+y*w;
+			const int lh = (int)layer.heights[lidx];
+			if (lh == 0xff) continue;
+			const unsigned char reg = layer.regs[lidx];
+			
+			unsigned int col = duLerpCol(color, duIntToCol(reg, 255), 192);
+			
+			const float fx = bmin[0] + x*cs;
+			const float fy = bmin[1] + (lh+1)*ch;
+			const float fz = bmin[2] + y*cs;
+			
+			dd->vertex(fx, fy, fz, col);
+			dd->vertex(fx, fy, fz+cs, col);
+			dd->vertex(fx+cs, fy, fz+cs, col);
+			dd->vertex(fx+cs, fy, fz, col);
+		}
+	}
+	dd->end();
+	
+	debugDrawTileCachePortals(dd, layer, cs, ch);
+}
+
+
+
+
+/*struct dtTileCacheContour
+{
+	int nverts;
+	unsigned char* verts;
+	unsigned char reg;
+	unsigned char area;
+};
+
+struct dtTileCacheContourSet
+{
+	int nconts;
+	dtTileCacheContour* conts;
+};*/
+
+void duDebugDrawTileCacheContours(duDebugDraw* dd, const struct dtTileCacheContourSet& lcset,
+								  const float* orig, const float cs, const float ch)
+{
+	if (!dd) return;
+	
+	const unsigned char a = 255;// (unsigned char)(alpha*255.0f);
+	
+	const int offs[2*4] = {-1,0, 0,1, 1,0, 0,-1};
+	
+	dd->begin(DU_DRAW_LINES, 2.0f);
+	
+	for (int i = 0; i < lcset.nconts; ++i)
+	{
+		const dtTileCacheContour& c = lcset.conts[i];
+		unsigned int color = 0;
+		
+		color = duIntToCol(i, a);
+		
+		for (int j = 0; j < c.nverts; ++j)
+		{
+			const int k = (j+1) % c.nverts;
+			const unsigned char* va = &c.verts[j*4];
+			const unsigned char* vb = &c.verts[k*4];
+			const float ax = orig[0] + va[0]*cs;
+			const float ay = orig[1] + (va[1]+1+(i&1))*ch;
+			const float az = orig[2] + va[2]*cs;
+			const float bx = orig[0] + vb[0]*cs;
+			const float by = orig[1] + (vb[1]+1+(i&1))*ch;
+			const float bz = orig[2] + vb[2]*cs;
+			unsigned int col = color;
+			if ((va[3] & 0xf) != 0xf)
+			{
+				// Portal segment
+				col = duRGBA(255,255,255,128);
+				int d = va[3] & 0xf;
+				
+				const float cx = (ax+bx)*0.5f;
+				const float cy = (ay+by)*0.5f;
+				const float cz = (az+bz)*0.5f;
+				
+				const float dx = cx + offs[d*2+0]*2*cs;
+				const float dy = cy;
+				const float dz = cz + offs[d*2+1]*2*cs;
+				
+				dd->vertex(cx,cy,cz,duRGBA(255,0,0,255));
+				dd->vertex(dx,dy,dz,duRGBA(255,0,0,255));
+			}
+			
+			duAppendArrow(dd, ax,ay,az, bx,by,bz, 0.0f, cs*0.5f, col);
+		}
+	}
+	dd->end();
+	
+	dd->begin(DU_DRAW_POINTS, 4.0f);	
+	
+	for (int i = 0; i < lcset.nconts; ++i)
+	{
+		const dtTileCacheContour& c = lcset.conts[i];
+		unsigned int color = 0;
+		
+		for (int j = 0; j < c.nverts; ++j)
+		{
+			const unsigned char* va = &c.verts[j*4];
+			
+			color = duDarkenCol(duIntToCol(i, a));
+			if (va[3] & 0x80)
+			{
+				// Border vertex
+				color = duRGBA(255,0,0,255);
+			}
+			
+			float fx = orig[0] + va[0]*cs;
+			float fy = orig[1] + (va[1]+1+(i&1))*ch;
+			float fz = orig[2] + va[2]*cs;
+			dd->vertex(fx,fy,fz, color);
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawTileCachePolyMesh(duDebugDraw* dd, const struct dtTileCachePolyMesh& lmesh,
+								  const float* orig, const float cs, const float ch)
+{
+	if (!dd) return;
+	
+	const int nvp = lmesh.nvp;
+	
+	const int offs[2*4] = {-1,0, 0,1, 1,0, 0,-1};
+	
+	dd->begin(DU_DRAW_TRIS);
+	
+	for (int i = 0; i < lmesh.npolys; ++i)
+	{
+		const unsigned short* p = &lmesh.polys[i*nvp*2];
+		
+		unsigned int color;
+		if (lmesh.areas[i] == DT_TILECACHE_WALKABLE_AREA)
+			color = duRGBA(0,192,255,64);
+		else if (lmesh.areas[i] == DT_TILECACHE_NULL_AREA)
+			color = duRGBA(0,0,0,64);
+		else
+			color = duIntToCol(lmesh.areas[i], 255);
+		
+		unsigned short vi[3];
+		for (int j = 2; j < nvp; ++j)
+		{
+			if (p[j] == DT_TILECACHE_NULL_IDX) break;
+			vi[0] = p[0];
+			vi[1] = p[j-1];
+			vi[2] = p[j];
+			for (int k = 0; k < 3; ++k)
+			{
+				const unsigned short* v = &lmesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x,y,z, color);
+			}
+		}
+	}
+	dd->end();
+	
+	// Draw neighbours edges
+	const unsigned int coln = duRGBA(0,48,64,32);
+	dd->begin(DU_DRAW_LINES, 1.5f);
+	for (int i = 0; i < lmesh.npolys; ++i)
+	{
+		const unsigned short* p = &lmesh.polys[i*nvp*2];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == DT_TILECACHE_NULL_IDX) break;
+			if (p[nvp+j] & 0x8000) continue;
+			const int nj = (j+1 >= nvp || p[j+1] == DT_TILECACHE_NULL_IDX) ? 0 : j+1; 
+			int vi[2] = {p[j], p[nj]};
+			
+			for (int k = 0; k < 2; ++k)
+			{
+				const unsigned short* v = &lmesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x, y, z, coln);
+			}
+		}
+	}
+	dd->end();
+	
+	// Draw boundary edges
+	const unsigned int colb = duRGBA(0,48,64,220);
+	dd->begin(DU_DRAW_LINES, 2.5f);
+	for (int i = 0; i < lmesh.npolys; ++i)
+	{
+		const unsigned short* p = &lmesh.polys[i*nvp*2];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == DT_TILECACHE_NULL_IDX) break;
+			if ((p[nvp+j] & 0x8000) == 0) continue;
+			const int nj = (j+1 >= nvp || p[j+1] == DT_TILECACHE_NULL_IDX) ? 0 : j+1; 
+			int vi[2] = {p[j], p[nj]};
+			
+			unsigned int col = colb;
+			if ((p[nvp+j] & 0xf) != 0xf)
+			{
+				const unsigned short* va = &lmesh.verts[vi[0]*3];
+				const unsigned short* vb = &lmesh.verts[vi[1]*3];
+				
+				const float ax = orig[0] + va[0]*cs;
+				const float ay = orig[1] + (va[1]+1+(i&1))*ch;
+				const float az = orig[2] + va[2]*cs;
+				const float bx = orig[0] + vb[0]*cs;
+				const float by = orig[1] + (vb[1]+1+(i&1))*ch;
+				const float bz = orig[2] + vb[2]*cs;
+				
+				const float cx = (ax+bx)*0.5f;
+				const float cy = (ay+by)*0.5f;
+				const float cz = (az+bz)*0.5f;
+				
+				int d = p[nvp+j] & 0xf;
+				
+				const float dx = cx + offs[d*2+0]*2*cs;
+				const float dy = cy;
+				const float dz = cz + offs[d*2+1]*2*cs;
+				
+				dd->vertex(cx,cy,cz,duRGBA(255,0,0,255));
+				dd->vertex(dx,dy,dz,duRGBA(255,0,0,255));
+				
+				col = duRGBA(255,255,255,128);
+			}
+			
+			for (int k = 0; k < 2; ++k)
+			{
+				const unsigned short* v = &lmesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x, y, z, col);
+			}
+		}
+	}
+	dd->end();
+	
+	dd->begin(DU_DRAW_POINTS, 3.0f);
+	const unsigned int colv = duRGBA(0,0,0,220);
+	for (int i = 0; i < lmesh.nverts; ++i)
+	{
+		const unsigned short* v = &lmesh.verts[i*3];
+		const float x = orig[0] + v[0]*cs;
+		const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+		const float z = orig[2] + v[2]*cs;
+		dd->vertex(x,y,z, colv);
+	}
+	dd->end();
+}
+
+
+
diff --git a/Engine/lib/recast/DebugUtils/Source/RecastDebugDraw.cpp b/Engine/lib/recast/DebugUtils/Source/RecastDebugDraw.cpp
new file mode 100644
index 000000000..82050bde0
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Source/RecastDebugDraw.cpp
@@ -0,0 +1,1062 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include "DebugDraw.h"
+#include "RecastDebugDraw.h"
+#include "Recast.h"
+
+void duDebugDrawTriMesh(duDebugDraw* dd, const float* verts, int /*nverts*/,
+						const int* tris, const float* normals, int ntris,
+						const unsigned char* flags, const float texScale)
+{
+	if (!dd) return;
+	if (!verts) return;
+	if (!tris) return;
+	if (!normals) return;
+
+	float uva[2];
+	float uvb[2];
+	float uvc[2];
+
+	const unsigned int unwalkable = duRGBA(192,128,0,255);
+
+	dd->texture(true);
+
+	dd->begin(DU_DRAW_TRIS);
+	for (int i = 0; i < ntris*3; i += 3)
+	{
+		const float* norm = &normals[i];
+		unsigned int color;
+		unsigned char a = (unsigned char)(220*(2+norm[0]+norm[1])/4);
+		if (flags && !flags[i/3])
+			color = duLerpCol(duRGBA(a,a,a,255), unwalkable, 64);
+		else
+			color = duRGBA(a,a,a,255);
+
+		const float* va = &verts[tris[i+0]*3];
+		const float* vb = &verts[tris[i+1]*3];
+		const float* vc = &verts[tris[i+2]*3];
+		
+		int ax = 0, ay = 0;
+		if (rcAbs(norm[1]) > rcAbs(norm[ax]))
+			ax = 1;
+		if (rcAbs(norm[2]) > rcAbs(norm[ax]))
+			ax = 2;
+		ax = (1<<ax)&3; // +1 mod 3
+		ay = (1<<ax)&3; // +1 mod 3
+
+		uva[0] = va[ax]*texScale;
+		uva[1] = va[ay]*texScale;
+		uvb[0] = vb[ax]*texScale;
+		uvb[1] = vb[ay]*texScale;
+		uvc[0] = vc[ax]*texScale;
+		uvc[1] = vc[ay]*texScale;
+		
+		dd->vertex(va, color, uva);
+		dd->vertex(vb, color, uvb);
+		dd->vertex(vc, color, uvc);
+	}
+	dd->end();
+	dd->texture(false);
+}
+
+void duDebugDrawTriMeshSlope(duDebugDraw* dd, const float* verts, int /*nverts*/,
+							 const int* tris, const float* normals, int ntris,
+							 const float walkableSlopeAngle, const float texScale)
+{
+	if (!dd) return;
+	if (!verts) return;
+	if (!tris) return;
+	if (!normals) return;
+	
+	const float walkableThr = cosf(walkableSlopeAngle/180.0f*DU_PI);
+	
+	float uva[2];
+	float uvb[2];
+	float uvc[2];
+	
+	dd->texture(true);
+
+	const unsigned int unwalkable = duRGBA(192,128,0,255);
+	
+	dd->begin(DU_DRAW_TRIS);
+	for (int i = 0; i < ntris*3; i += 3)
+	{
+		const float* norm = &normals[i];
+		unsigned int color;
+		unsigned char a = (unsigned char)(220*(2+norm[0]+norm[1])/4);
+		if (norm[1] < walkableThr)
+			color = duLerpCol(duRGBA(a,a,a,255), unwalkable, 64);
+		else
+			color = duRGBA(a,a,a,255);
+		
+		const float* va = &verts[tris[i+0]*3];
+		const float* vb = &verts[tris[i+1]*3];
+		const float* vc = &verts[tris[i+2]*3];
+		
+		int ax = 0, ay = 0;
+		if (rcAbs(norm[1]) > rcAbs(norm[ax]))
+			ax = 1;
+		if (rcAbs(norm[2]) > rcAbs(norm[ax]))
+			ax = 2;
+		ax = (1<<ax)&3; // +1 mod 3
+		ay = (1<<ax)&3; // +1 mod 3
+		
+		uva[0] = va[ax]*texScale;
+		uva[1] = va[ay]*texScale;
+		uvb[0] = vb[ax]*texScale;
+		uvb[1] = vb[ay]*texScale;
+		uvc[0] = vc[ax]*texScale;
+		uvc[1] = vc[ay]*texScale;
+		
+		dd->vertex(va, color, uva);
+		dd->vertex(vb, color, uvb);
+		dd->vertex(vc, color, uvc);
+	}
+	dd->end();
+
+	dd->texture(false);
+}
+
+void duDebugDrawHeightfieldSolid(duDebugDraw* dd, const rcHeightfield& hf)
+{
+	if (!dd) return;
+
+	const float* orig = hf.bmin;
+	const float cs = hf.cs;
+	const float ch = hf.ch;
+	
+	const int w = hf.width;
+	const int h = hf.height;
+		
+	unsigned int fcol[6];
+	duCalcBoxColors(fcol, duRGBA(255,255,255,255), duRGBA(255,255,255,255));
+	
+	dd->begin(DU_DRAW_QUADS);
+	
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			float fx = orig[0] + x*cs;
+			float fz = orig[2] + y*cs;
+			const rcSpan* s = hf.spans[x + y*w];
+			while (s)
+			{
+				duAppendBox(dd, fx, orig[1]+s->smin*ch, fz, fx+cs, orig[1] + s->smax*ch, fz+cs, fcol);
+				s = s->next;
+			}
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawHeightfieldWalkable(duDebugDraw* dd, const rcHeightfield& hf)
+{
+	if (!dd) return;
+
+	const float* orig = hf.bmin;
+	const float cs = hf.cs;
+	const float ch = hf.ch;
+	
+	const int w = hf.width;
+	const int h = hf.height;
+	
+	unsigned int fcol[6];
+	duCalcBoxColors(fcol, duRGBA(255,255,255,255), duRGBA(217,217,217,255));
+
+	dd->begin(DU_DRAW_QUADS);
+	
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			float fx = orig[0] + x*cs;
+			float fz = orig[2] + y*cs;
+			const rcSpan* s = hf.spans[x + y*w];
+			while (s)
+			{
+				if (s->area == RC_WALKABLE_AREA)
+					fcol[0] = duRGBA(64,128,160,255);
+				else if (s->area == RC_NULL_AREA)
+					fcol[0] = duRGBA(64,64,64,255);
+				else
+					fcol[0] = duMultCol(duIntToCol(s->area, 255), 200);
+				
+				duAppendBox(dd, fx, orig[1]+s->smin*ch, fz, fx+cs, orig[1] + s->smax*ch, fz+cs, fcol);
+				s = s->next;
+			}
+		}
+	}
+	
+	dd->end();
+}
+
+void duDebugDrawCompactHeightfieldSolid(duDebugDraw* dd, const rcCompactHeightfield& chf)
+{
+	if (!dd) return;
+
+	const float cs = chf.cs;
+	const float ch = chf.ch;
+
+	dd->begin(DU_DRAW_QUADS);
+	
+	for (int y = 0; y < chf.height; ++y)
+	{
+		for (int x = 0; x < chf.width; ++x)
+		{
+			const float fx = chf.bmin[0] + x*cs;
+			const float fz = chf.bmin[2] + y*cs;
+			const rcCompactCell& c = chf.cells[x+y*chf.width];
+
+			for (unsigned i = c.index, ni = c.index+c.count; i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+
+				unsigned int color;
+				if (chf.areas[i] == RC_WALKABLE_AREA)
+					color = duRGBA(0,192,255,64);
+				else if (chf.areas[i] == RC_NULL_AREA)
+					color = duRGBA(0,0,0,64);
+				else
+					color = duIntToCol(chf.areas[i], 255);
+				
+				const float fy = chf.bmin[1] + (s.y+1)*ch;
+				dd->vertex(fx, fy, fz, color);
+				dd->vertex(fx, fy, fz+cs, color);
+				dd->vertex(fx+cs, fy, fz+cs, color);
+				dd->vertex(fx+cs, fy, fz, color);
+			}
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawCompactHeightfieldRegions(duDebugDraw* dd, const rcCompactHeightfield& chf)
+{
+	if (!dd) return;
+
+	const float cs = chf.cs;
+	const float ch = chf.ch;
+
+	dd->begin(DU_DRAW_QUADS);
+
+	for (int y = 0; y < chf.height; ++y)
+	{
+		for (int x = 0; x < chf.width; ++x)
+		{
+			const float fx = chf.bmin[0] + x*cs;
+			const float fz = chf.bmin[2] + y*cs;
+			const rcCompactCell& c = chf.cells[x+y*chf.width];
+			
+			for (unsigned i = c.index, ni = c.index+c.count; i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				const float fy = chf.bmin[1] + (s.y)*ch;
+				unsigned int color;
+				if (s.reg)
+					color = duIntToCol(s.reg, 192);
+				else
+					color = duRGBA(0,0,0,64);
+
+				dd->vertex(fx, fy, fz, color);
+				dd->vertex(fx, fy, fz+cs, color);
+				dd->vertex(fx+cs, fy, fz+cs, color);
+				dd->vertex(fx+cs, fy, fz, color);
+			}
+		}
+	}
+	
+	dd->end();
+}
+
+
+void duDebugDrawCompactHeightfieldDistance(duDebugDraw* dd, const rcCompactHeightfield& chf)
+{
+	if (!dd) return;
+	if (!chf.dist) return;
+		
+	const float cs = chf.cs;
+	const float ch = chf.ch;
+			
+	float maxd = chf.maxDistance;
+	if (maxd < 1.0f) maxd = 1;
+	const float dscale = 255.0f / maxd;
+	
+	dd->begin(DU_DRAW_QUADS);
+	
+	for (int y = 0; y < chf.height; ++y)
+	{
+		for (int x = 0; x < chf.width; ++x)
+		{
+			const float fx = chf.bmin[0] + x*cs;
+			const float fz = chf.bmin[2] + y*cs;
+			const rcCompactCell& c = chf.cells[x+y*chf.width];
+			
+			for (unsigned i = c.index, ni = c.index+c.count; i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				const float fy = chf.bmin[1] + (s.y+1)*ch;
+				const unsigned char cd = (unsigned char)(chf.dist[i] * dscale);
+				const unsigned int color = duRGBA(cd,cd,cd,255);
+				dd->vertex(fx, fy, fz, color);
+				dd->vertex(fx, fy, fz+cs, color);
+				dd->vertex(fx+cs, fy, fz+cs, color);
+				dd->vertex(fx+cs, fy, fz, color);
+			}
+		}
+	}
+	dd->end();
+}
+
+static void drawLayerPortals(duDebugDraw* dd, const rcHeightfieldLayer* layer)
+{
+	const float cs = layer->cs;
+	const float ch = layer->ch;
+	const int w = layer->width;
+	const int h = layer->height;
+	
+	unsigned int pcol = duRGBA(255,255,255,255);
+	
+	const int segs[4*4] = {0,0,0,1, 0,1,1,1, 1,1,1,0, 1,0,0,0};
+	
+	// Layer portals
+	dd->begin(DU_DRAW_LINES, 2.0f);
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const int idx = x+y*w;
+			const int lh = (int)layer->heights[idx];
+			if (lh == 255) continue;
+			
+			for (int dir = 0; dir < 4; ++dir)
+			{
+				if (layer->cons[idx] & (1<<(dir+4)))
+				{
+					const int* seg = &segs[dir*4];
+					const float ax = layer->bmin[0] + (x+seg[0])*cs;
+					const float ay = layer->bmin[1] + (lh+2)*ch;
+					const float az = layer->bmin[2] + (y+seg[1])*cs;
+					const float bx = layer->bmin[0] + (x+seg[2])*cs;
+					const float by = layer->bmin[1] + (lh+2)*ch;
+					const float bz = layer->bmin[2] + (y+seg[3])*cs;
+					dd->vertex(ax, ay, az, pcol);
+					dd->vertex(bx, by, bz, pcol);
+				}
+			}
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawHeightfieldLayer(duDebugDraw* dd, const struct rcHeightfieldLayer& layer, const int idx)
+{
+	const float cs = layer.cs;
+	const float ch = layer.ch;
+	const int w = layer.width;
+	const int h = layer.height;
+	
+	unsigned int color = duIntToCol(idx+1, 255);
+	
+	// Layer bounds
+	float bmin[3], bmax[3];
+	bmin[0] = layer.bmin[0] + layer.minx*cs;
+	bmin[1] = layer.bmin[1];
+	bmin[2] = layer.bmin[2] + layer.miny*cs;
+	bmax[0] = layer.bmin[0] + (layer.maxx+1)*cs;
+	bmax[1] = layer.bmax[1];
+	bmax[2] = layer.bmin[2] + (layer.maxy+1)*cs;
+	duDebugDrawBoxWire(dd, bmin[0],bmin[1],bmin[2], bmax[0],bmax[1],bmax[2], duTransCol(color,128), 2.0f);
+	
+	// Layer height
+	dd->begin(DU_DRAW_QUADS);
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const int lidx = x+y*w;
+			const int lh = (int)layer.heights[lidx];
+			if (h == 0xff) continue;
+			const unsigned char area = layer.areas[lidx];
+			
+			unsigned int col;
+			if (area == RC_WALKABLE_AREA)
+				col = duLerpCol(color, duRGBA(0,192,255,64), 32);
+			else if (area == RC_NULL_AREA)
+				col = duLerpCol(color, duRGBA(0,0,0,64), 32);
+			else
+				col = duLerpCol(color, duIntToCol(area, 255), 32);
+			
+			const float fx = layer.bmin[0] + x*cs;
+			const float fy = layer.bmin[1] + (lh+1)*ch;
+			const float fz = layer.bmin[2] + y*cs;
+			
+			dd->vertex(fx, fy, fz, col);
+			dd->vertex(fx, fy, fz+cs, col);
+			dd->vertex(fx+cs, fy, fz+cs, col);
+			dd->vertex(fx+cs, fy, fz, col);
+		}
+	}
+	dd->end();
+	
+	// Portals
+	drawLayerPortals(dd, &layer);
+}
+
+void duDebugDrawHeightfieldLayers(duDebugDraw* dd, const struct rcHeightfieldLayerSet& lset)
+{
+	if (!dd) return;
+	for (int i = 0; i < lset.nlayers; ++i)
+		duDebugDrawHeightfieldLayer(dd, lset.layers[i], i);
+}
+
+/*
+void duDebugDrawLayerContours(duDebugDraw* dd, const struct rcLayerContourSet& lcset)
+{
+	if (!dd) return;
+	
+	const float* orig = lcset.bmin;
+	const float cs = lcset.cs;
+	const float ch = lcset.ch;
+	
+	const unsigned char a = 255;// (unsigned char)(alpha*255.0f);
+	
+	const int offs[2*4] = {-1,0, 0,1, 1,0, 0,-1};
+
+	dd->begin(DU_DRAW_LINES, 2.0f);
+	
+	for (int i = 0; i < lcset.nconts; ++i)
+	{
+		const rcLayerContour& c = lcset.conts[i];
+		unsigned int color = 0;
+
+		color = duIntToCol(i, a);
+
+		for (int j = 0; j < c.nverts; ++j)
+		{
+			const int k = (j+1) % c.nverts;
+			const unsigned char* va = &c.verts[j*4];
+			const unsigned char* vb = &c.verts[k*4];
+			const float ax = orig[0] + va[0]*cs;
+			const float ay = orig[1] + (va[1]+1+(i&1))*ch;
+			const float az = orig[2] + va[2]*cs;
+			const float bx = orig[0] + vb[0]*cs;
+			const float by = orig[1] + (vb[1]+1+(i&1))*ch;
+			const float bz = orig[2] + vb[2]*cs;
+			unsigned int col = color;
+			if ((va[3] & 0xf) != 0xf)
+			{
+				col = duRGBA(255,255,255,128);
+				int d = va[3] & 0xf;
+				
+				const float cx = (ax+bx)*0.5f;
+				const float cy = (ay+by)*0.5f;
+				const float cz = (az+bz)*0.5f;
+				
+				const float dx = cx + offs[d*2+0]*2*cs;
+				const float dy = cy;
+				const float dz = cz + offs[d*2+1]*2*cs;
+				
+				dd->vertex(cx,cy,cz,duRGBA(255,0,0,255));
+				dd->vertex(dx,dy,dz,duRGBA(255,0,0,255));
+			}
+			
+			duAppendArrow(dd, ax,ay,az, bx,by,bz, 0.0f, cs*0.5f, col);
+		}
+	}
+	dd->end();
+	
+	dd->begin(DU_DRAW_POINTS, 4.0f);	
+	
+	for (int i = 0; i < lcset.nconts; ++i)
+	{
+		const rcLayerContour& c = lcset.conts[i];
+		unsigned int color = 0;
+		
+		for (int j = 0; j < c.nverts; ++j)
+		{
+			const unsigned char* va = &c.verts[j*4];
+
+			color = duDarkenCol(duIntToCol(i, a));
+			if (va[3] & 0x80)
+				color = duRGBA(255,0,0,255);
+
+			float fx = orig[0] + va[0]*cs;
+			float fy = orig[1] + (va[1]+1+(i&1))*ch;
+			float fz = orig[2] + va[2]*cs;
+			dd->vertex(fx,fy,fz, color);
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawLayerPolyMesh(duDebugDraw* dd, const struct rcLayerPolyMesh& lmesh)
+{
+	if (!dd) return;
+	
+	const int nvp = lmesh.nvp;
+	const float cs = lmesh.cs;
+	const float ch = lmesh.ch;
+	const float* orig = lmesh.bmin;
+	
+	const int offs[2*4] = {-1,0, 0,1, 1,0, 0,-1};
+
+	dd->begin(DU_DRAW_TRIS);
+	
+	for (int i = 0; i < lmesh.npolys; ++i)
+	{
+		const unsigned short* p = &lmesh.polys[i*nvp*2];
+		
+		unsigned int color;
+		if (lmesh.areas[i] == RC_WALKABLE_AREA)
+			color = duRGBA(0,192,255,64);
+		else if (lmesh.areas[i] == RC_NULL_AREA)
+			color = duRGBA(0,0,0,64);
+		else
+			color = duIntToCol(lmesh.areas[i], 255);
+		
+		unsigned short vi[3];
+		for (int j = 2; j < nvp; ++j)
+		{
+			if (p[j] == RC_MESH_NULL_IDX) break;
+			vi[0] = p[0];
+			vi[1] = p[j-1];
+			vi[2] = p[j];
+			for (int k = 0; k < 3; ++k)
+			{
+				const unsigned short* v = &lmesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x,y,z, color);
+			}
+		}
+	}
+	dd->end();
+	
+	// Draw neighbours edges
+	const unsigned int coln = duRGBA(0,48,64,32);
+	dd->begin(DU_DRAW_LINES, 1.5f);
+	for (int i = 0; i < lmesh.npolys; ++i)
+	{
+		const unsigned short* p = &lmesh.polys[i*nvp*2];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == RC_MESH_NULL_IDX) break;
+			if (p[nvp+j] & 0x8000) continue;
+			const int nj = (j+1 >= nvp || p[j+1] == RC_MESH_NULL_IDX) ? 0 : j+1; 
+			int vi[2] = {p[j], p[nj]};
+			
+			for (int k = 0; k < 2; ++k)
+			{
+				const unsigned short* v = &lmesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x, y, z, coln);
+			}
+		}
+	}
+	dd->end();
+	
+	// Draw boundary edges
+	const unsigned int colb = duRGBA(0,48,64,220);
+	dd->begin(DU_DRAW_LINES, 2.5f);
+	for (int i = 0; i < lmesh.npolys; ++i)
+	{
+		const unsigned short* p = &lmesh.polys[i*nvp*2];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == RC_MESH_NULL_IDX) break;
+			if ((p[nvp+j] & 0x8000) == 0) continue;
+			const int nj = (j+1 >= nvp || p[j+1] == RC_MESH_NULL_IDX) ? 0 : j+1; 
+			int vi[2] = {p[j], p[nj]};
+			
+			unsigned int col = colb;
+			if ((p[nvp+j] & 0xf) != 0xf)
+			{
+				const unsigned short* va = &lmesh.verts[vi[0]*3];
+				const unsigned short* vb = &lmesh.verts[vi[1]*3];
+
+				const float ax = orig[0] + va[0]*cs;
+				const float ay = orig[1] + (va[1]+1+(i&1))*ch;
+				const float az = orig[2] + va[2]*cs;
+				const float bx = orig[0] + vb[0]*cs;
+				const float by = orig[1] + (vb[1]+1+(i&1))*ch;
+				const float bz = orig[2] + vb[2]*cs;
+				
+				const float cx = (ax+bx)*0.5f;
+				const float cy = (ay+by)*0.5f;
+				const float cz = (az+bz)*0.5f;
+				
+				int d = p[nvp+j] & 0xf;
+				
+				const float dx = cx + offs[d*2+0]*2*cs;
+				const float dy = cy;
+				const float dz = cz + offs[d*2+1]*2*cs;
+				
+				dd->vertex(cx,cy,cz,duRGBA(255,0,0,255));
+				dd->vertex(dx,dy,dz,duRGBA(255,0,0,255));
+				
+				col = duRGBA(255,255,255,128);
+			}
+							 
+			for (int k = 0; k < 2; ++k)
+			{
+				const unsigned short* v = &lmesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x, y, z, col);
+			}
+		}
+	}
+	dd->end();
+	
+	dd->begin(DU_DRAW_POINTS, 3.0f);
+	const unsigned int colv = duRGBA(0,0,0,220);
+	for (int i = 0; i < lmesh.nverts; ++i)
+	{
+		const unsigned short* v = &lmesh.verts[i*3];
+		const float x = orig[0] + v[0]*cs;
+		const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+		const float z = orig[2] + v[2]*cs;
+		dd->vertex(x,y,z, colv);
+	}
+	dd->end();
+}
+*/
+
+static void getContourCenter(const rcContour* cont, const float* orig, float cs, float ch, float* center)
+{
+	center[0] = 0;
+	center[1] = 0;
+	center[2] = 0;
+	if (!cont->nverts)
+		return;
+	for (int i = 0; i < cont->nverts; ++i)
+	{
+		const int* v = &cont->verts[i*4];
+		center[0] += (float)v[0];
+		center[1] += (float)v[1];
+		center[2] += (float)v[2];
+	}
+	const float s = 1.0f / cont->nverts;
+	center[0] *= s * cs;
+	center[1] *= s * ch;
+	center[2] *= s * cs;
+	center[0] += orig[0];
+	center[1] += orig[1] + 4*ch;
+	center[2] += orig[2];
+}
+
+static const rcContour* findContourFromSet(const rcContourSet& cset, unsigned short reg)
+{
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		if (cset.conts[i].reg == reg)
+			return &cset.conts[i];
+	}
+	return 0;
+}
+
+void duDebugDrawRegionConnections(duDebugDraw* dd, const rcContourSet& cset, const float alpha)
+{
+	if (!dd) return;
+	
+	const float* orig = cset.bmin;
+	const float cs = cset.cs;
+	const float ch = cset.ch;
+	
+	// Draw centers
+	float pos[3], pos2[3];
+
+	unsigned int color = duRGBA(0,0,0,196);
+
+	dd->begin(DU_DRAW_LINES, 2.0f);
+
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		const rcContour* cont = &cset.conts[i];
+		getContourCenter(cont, orig, cs, ch, pos);
+		for (int j = 0; j < cont->nverts; ++j)
+		{
+			const int* v = &cont->verts[j*4];
+			if (v[3] == 0 || (unsigned short)v[3] < cont->reg) continue;
+			const rcContour* cont2 = findContourFromSet(cset, (unsigned short)v[3]);
+			if (cont2)
+			{
+				getContourCenter(cont2, orig, cs, ch, pos2);
+				duAppendArc(dd, pos[0],pos[1],pos[2], pos2[0],pos2[1],pos2[2], 0.25f, 0.6f, 0.6f, color);
+			}
+		}
+	}
+	
+	dd->end();
+
+	unsigned char a = (unsigned char)(alpha * 255.0f);
+
+	dd->begin(DU_DRAW_POINTS, 7.0f);
+
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		const rcContour* cont = &cset.conts[i];
+		unsigned int col = duDarkenCol(duIntToCol(cont->reg,a));
+		getContourCenter(cont, orig, cs, ch, pos);
+		dd->vertex(pos, col);
+	}
+	dd->end();
+}
+
+void duDebugDrawRawContours(duDebugDraw* dd, const rcContourSet& cset, const float alpha)
+{
+	if (!dd) return;
+
+	const float* orig = cset.bmin;
+	const float cs = cset.cs;
+	const float ch = cset.ch;
+	
+	const unsigned char a = (unsigned char)(alpha*255.0f);
+	
+	dd->begin(DU_DRAW_LINES, 2.0f);
+			
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		const rcContour& c = cset.conts[i];
+		unsigned int color = duIntToCol(c.reg, a);
+
+		for (int j = 0; j < c.nrverts; ++j)
+		{
+			const int* v = &c.rverts[j*4];
+			float fx = orig[0] + v[0]*cs;
+			float fy = orig[1] + (v[1]+1+(i&1))*ch;
+			float fz = orig[2] + v[2]*cs;
+			dd->vertex(fx,fy,fz,color);
+			if (j > 0)
+				dd->vertex(fx,fy,fz,color);
+		}
+		// Loop last segment.
+		const int* v = &c.rverts[0];
+		float fx = orig[0] + v[0]*cs;
+		float fy = orig[1] + (v[1]+1+(i&1))*ch;
+		float fz = orig[2] + v[2]*cs;
+		dd->vertex(fx,fy,fz,color);
+	}
+	dd->end();
+
+	dd->begin(DU_DRAW_POINTS, 2.0f);	
+
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		const rcContour& c = cset.conts[i];
+		unsigned int color = duDarkenCol(duIntToCol(c.reg, a));
+		
+		for (int j = 0; j < c.nrverts; ++j)
+		{
+			const int* v = &c.rverts[j*4];
+			float off = 0;
+			unsigned int colv = color;
+			if (v[3] & RC_BORDER_VERTEX)
+			{
+				colv = duRGBA(255,255,255,a);
+				off = ch*2;
+			}
+			
+			float fx = orig[0] + v[0]*cs;
+			float fy = orig[1] + (v[1]+1+(i&1))*ch + off;
+			float fz = orig[2] + v[2]*cs;
+			dd->vertex(fx,fy,fz, colv);
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawContours(duDebugDraw* dd, const rcContourSet& cset, const float alpha)
+{
+	if (!dd) return;
+
+	const float* orig = cset.bmin;
+	const float cs = cset.cs;
+	const float ch = cset.ch;
+	
+	const unsigned char a = (unsigned char)(alpha*255.0f);
+	
+	dd->begin(DU_DRAW_LINES, 2.5f);
+	
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		const rcContour& c = cset.conts[i];
+		if (!c.nverts)
+			continue;
+		const unsigned int color = duIntToCol(c.reg, a);
+		const unsigned int bcolor = duLerpCol(color,duRGBA(255,255,255,a),128);
+		for (int j = 0, k = c.nverts-1; j < c.nverts; k=j++)
+		{
+			const int* va = &c.verts[k*4];
+			const int* vb = &c.verts[j*4];
+			unsigned int col = (va[3] & RC_AREA_BORDER) ? bcolor : color; 
+			float fx,fy,fz;
+			fx = orig[0] + va[0]*cs;
+			fy = orig[1] + (va[1]+1+(i&1))*ch;
+			fz = orig[2] + va[2]*cs;
+			dd->vertex(fx,fy,fz, col);
+			fx = orig[0] + vb[0]*cs;
+			fy = orig[1] + (vb[1]+1+(i&1))*ch;
+			fz = orig[2] + vb[2]*cs;
+			dd->vertex(fx,fy,fz, col);
+		}
+	}
+	dd->end();
+
+	dd->begin(DU_DRAW_POINTS, 3.0f);
+	
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		const rcContour& c = cset.conts[i];
+		unsigned int color = duDarkenCol(duIntToCol(c.reg, a));
+		for (int j = 0; j < c.nverts; ++j)
+		{
+			const int* v = &c.verts[j*4];
+			float off = 0;
+			unsigned int colv = color;
+			if (v[3] & RC_BORDER_VERTEX)
+			{
+				colv = duRGBA(255,255,255,a);
+				off = ch*2;
+			}
+
+			float fx = orig[0] + v[0]*cs;
+			float fy = orig[1] + (v[1]+1+(i&1))*ch + off;
+			float fz = orig[2] + v[2]*cs;
+			dd->vertex(fx,fy,fz, colv);
+		}
+	}
+	dd->end();
+}
+
+void duDebugDrawPolyMesh(duDebugDraw* dd, const struct rcPolyMesh& mesh)
+{
+	if (!dd) return;
+
+	const int nvp = mesh.nvp;
+	const float cs = mesh.cs;
+	const float ch = mesh.ch;
+	const float* orig = mesh.bmin;
+	
+	dd->begin(DU_DRAW_TRIS);
+	
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		const unsigned short* p = &mesh.polys[i*nvp*2];
+		
+		unsigned int color;
+		if (mesh.areas[i] == RC_WALKABLE_AREA)
+			color = duRGBA(0,192,255,64);
+		else if (mesh.areas[i] == RC_NULL_AREA)
+			color = duRGBA(0,0,0,64);
+		else
+			color = duIntToCol(mesh.areas[i], 255);
+		
+		unsigned short vi[3];
+		for (int j = 2; j < nvp; ++j)
+		{
+			if (p[j] == RC_MESH_NULL_IDX) break;
+			vi[0] = p[0];
+			vi[1] = p[j-1];
+			vi[2] = p[j];
+			for (int k = 0; k < 3; ++k)
+			{
+				const unsigned short* v = &mesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x,y,z, color);
+			}
+		}
+	}
+	dd->end();
+
+	// Draw neighbours edges
+	const unsigned int coln = duRGBA(0,48,64,32);
+	dd->begin(DU_DRAW_LINES, 1.5f);
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		const unsigned short* p = &mesh.polys[i*nvp*2];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == RC_MESH_NULL_IDX) break;
+			if (p[nvp+j] & 0x8000) continue;
+			const int nj = (j+1 >= nvp || p[j+1] == RC_MESH_NULL_IDX) ? 0 : j+1; 
+			const int vi[2] = {p[j], p[nj]};
+			
+			for (int k = 0; k < 2; ++k)
+			{
+				const unsigned short* v = &mesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x, y, z, coln);
+			}
+		}
+	}
+	dd->end();
+	
+	// Draw boundary edges
+	const unsigned int colb = duRGBA(0,48,64,220);
+	dd->begin(DU_DRAW_LINES, 2.5f);
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		const unsigned short* p = &mesh.polys[i*nvp*2];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == RC_MESH_NULL_IDX) break;
+			if ((p[nvp+j] & 0x8000) == 0) continue;
+			const int nj = (j+1 >= nvp || p[j+1] == RC_MESH_NULL_IDX) ? 0 : j+1; 
+			const int vi[2] = {p[j], p[nj]};
+			
+			unsigned int col = colb;
+			if ((p[nvp+j] & 0xf) != 0xf)
+				col = duRGBA(255,255,255,128);
+			for (int k = 0; k < 2; ++k)
+			{
+				const unsigned short* v = &mesh.verts[vi[k]*3];
+				const float x = orig[0] + v[0]*cs;
+				const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+				const float z = orig[2] + v[2]*cs;
+				dd->vertex(x, y, z, col);
+			}
+		}
+	}
+	dd->end();
+	
+	dd->begin(DU_DRAW_POINTS, 3.0f);
+	const unsigned int colv = duRGBA(0,0,0,220);
+	for (int i = 0; i < mesh.nverts; ++i)
+	{
+		const unsigned short* v = &mesh.verts[i*3];
+		const float x = orig[0] + v[0]*cs;
+		const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+		const float z = orig[2] + v[2]*cs;
+		dd->vertex(x,y,z, colv);
+	}
+	dd->end();
+}
+
+void duDebugDrawPolyMeshDetail(duDebugDraw* dd, const struct rcPolyMeshDetail& dmesh)
+{
+	if (!dd) return;
+
+	dd->begin(DU_DRAW_TRIS);
+	
+	for (int i = 0; i < dmesh.nmeshes; ++i)
+	{
+		const unsigned int* m = &dmesh.meshes[i*4];
+		const unsigned int bverts = m[0];
+		const unsigned int btris = m[2];
+		const int ntris = (int)m[3];
+		const float* verts = &dmesh.verts[bverts*3];
+		const unsigned char* tris = &dmesh.tris[btris*4];
+
+		unsigned int color = duIntToCol(i, 192);
+
+		for (int j = 0; j < ntris; ++j)
+		{
+			dd->vertex(&verts[tris[j*4+0]*3], color);
+			dd->vertex(&verts[tris[j*4+1]*3], color);
+			dd->vertex(&verts[tris[j*4+2]*3], color);
+		}
+	}
+	dd->end();
+
+	// Internal edges.
+	dd->begin(DU_DRAW_LINES, 1.0f);
+	const unsigned int coli = duRGBA(0,0,0,64);
+	for (int i = 0; i < dmesh.nmeshes; ++i)
+	{
+		const unsigned int* m = &dmesh.meshes[i*4];
+		const unsigned int bverts = m[0];
+		const unsigned int btris = m[2];
+		const int ntris = (int)m[3];
+		const float* verts = &dmesh.verts[bverts*3];
+		const unsigned char* tris = &dmesh.tris[btris*4];
+		
+		for (int j = 0; j < ntris; ++j)
+		{
+			const unsigned char* t = &tris[j*4];
+			for (int k = 0, kp = 2; k < 3; kp=k++)
+			{
+				unsigned char ef = (t[3] >> (kp*2)) & 0x3;
+				if (ef == 0)
+				{
+					// Internal edge
+					if (t[kp] < t[k])
+					{
+						dd->vertex(&verts[t[kp]*3], coli);
+						dd->vertex(&verts[t[k]*3], coli);
+					}
+				}
+			}
+		}
+	}
+	dd->end();
+	
+	// External edges.
+	dd->begin(DU_DRAW_LINES, 2.0f);
+	const unsigned int cole = duRGBA(0,0,0,64);
+	for (int i = 0; i < dmesh.nmeshes; ++i)
+	{
+		const unsigned int* m = &dmesh.meshes[i*4];
+		const unsigned int bverts = m[0];
+		const unsigned int btris = m[2];
+		const int ntris = (int)m[3];
+		const float* verts = &dmesh.verts[bverts*3];
+		const unsigned char* tris = &dmesh.tris[btris*4];
+		
+		for (int j = 0; j < ntris; ++j)
+		{
+			const unsigned char* t = &tris[j*4];
+			for (int k = 0, kp = 2; k < 3; kp=k++)
+			{
+				unsigned char ef = (t[3] >> (kp*2)) & 0x3;
+				if (ef != 0)
+				{
+					// Ext edge
+					dd->vertex(&verts[t[kp]*3], cole);
+					dd->vertex(&verts[t[k]*3], cole);
+				}
+			}
+		}
+	}
+	dd->end();
+	
+	dd->begin(DU_DRAW_POINTS, 3.0f);
+	const unsigned int colv = duRGBA(0,0,0,64);
+	for (int i = 0; i < dmesh.nmeshes; ++i)
+	{
+		const unsigned int* m = &dmesh.meshes[i*4];
+		const unsigned int bverts = m[0];
+		const int nverts = (int)m[1];
+		const float* verts = &dmesh.verts[bverts*3];
+		for (int j = 0; j < nverts; ++j)
+			dd->vertex(&verts[j*3], colv);
+	}
+	dd->end();
+}
diff --git a/Engine/lib/recast/DebugUtils/Source/RecastDump.cpp b/Engine/lib/recast/DebugUtils/Source/RecastDump.cpp
new file mode 100644
index 000000000..7663fc755
--- /dev/null
+++ b/Engine/lib/recast/DebugUtils/Source/RecastDump.cpp
@@ -0,0 +1,451 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastDump.h"
+
+
+duFileIO::~duFileIO()
+{
+	// Empty
+}
+	
+static void ioprintf(duFileIO* io, const char* format, ...)
+{
+	char line[256];
+	va_list ap;
+	va_start(ap, format);
+	const int n = vsnprintf(line, sizeof(line), format, ap);
+	va_end(ap);
+	if (n > 0)
+		io->write(line, sizeof(char)*n);
+}
+
+bool duDumpPolyMeshToObj(rcPolyMesh& pmesh, duFileIO* io)
+{
+	if (!io)
+	{
+		printf("duDumpPolyMeshToObj: input IO is null.\n"); 
+		return false;
+	}
+	if (!io->isWriting())
+	{
+		printf("duDumpPolyMeshToObj: input IO not writing.\n"); 
+		return false;
+	}
+	
+	const int nvp = pmesh.nvp;
+	const float cs = pmesh.cs;
+	const float ch = pmesh.ch;
+	const float* orig = pmesh.bmin;
+	
+	ioprintf(io, "# Recast Navmesh\n");
+	ioprintf(io, "o NavMesh\n");
+
+	ioprintf(io, "\n");
+	
+	for (int i = 0; i < pmesh.nverts; ++i)
+	{
+		const unsigned short* v = &pmesh.verts[i*3];
+		const float x = orig[0] + v[0]*cs;
+		const float y = orig[1] + (v[1]+1)*ch + 0.1f;
+		const float z = orig[2] + v[2]*cs;
+		ioprintf(io, "v %f %f %f\n", x,y,z);
+	}
+
+	ioprintf(io, "\n");
+
+	for (int i = 0; i < pmesh.npolys; ++i)
+	{
+		const unsigned short* p = &pmesh.polys[i*nvp*2];
+		for (int j = 2; j < nvp; ++j)
+		{
+			if (p[j] == RC_MESH_NULL_IDX) break;
+			ioprintf(io, "f %d %d %d\n", p[0]+1, p[j-1]+1, p[j]+1); 
+		}
+	}
+	
+	return true;
+}
+
+bool duDumpPolyMeshDetailToObj(rcPolyMeshDetail& dmesh, duFileIO* io)
+{
+	if (!io)
+	{
+		printf("duDumpPolyMeshDetailToObj: input IO is null.\n"); 
+		return false;
+	}
+	if (!io->isWriting())
+	{
+		printf("duDumpPolyMeshDetailToObj: input IO not writing.\n"); 
+		return false;
+	}
+	
+	ioprintf(io, "# Recast Navmesh\n");
+	ioprintf(io, "o NavMesh\n");
+	
+	ioprintf(io, "\n");
+
+	for (int i = 0; i < dmesh.nverts; ++i)
+	{
+		const float* v = &dmesh.verts[i*3];
+		ioprintf(io, "v %f %f %f\n", v[0],v[1],v[2]);
+	}
+	
+	ioprintf(io, "\n");
+	
+	for (int i = 0; i < dmesh.nmeshes; ++i)
+	{
+		const unsigned int* m = &dmesh.meshes[i*4];
+		const unsigned int bverts = m[0];
+		const unsigned int btris = m[2];
+		const unsigned int ntris = m[3];
+		const unsigned char* tris = &dmesh.tris[btris*4];
+		for (unsigned int j = 0; j < ntris; ++j)
+		{
+			ioprintf(io, "f %d %d %d\n",
+					(int)(bverts+tris[j*4+0])+1,
+					(int)(bverts+tris[j*4+1])+1,
+					(int)(bverts+tris[j*4+2])+1);
+		}
+	}
+	
+	return true;
+}
+
+static const int CSET_MAGIC = ('c' << 24) | ('s' << 16) | ('e' << 8) | 't';
+static const int CSET_VERSION = 2;
+
+bool duDumpContourSet(struct rcContourSet& cset, duFileIO* io)
+{
+	if (!io)
+	{
+		printf("duDumpContourSet: input IO is null.\n"); 
+		return false;
+	}
+	if (!io->isWriting())
+	{
+		printf("duDumpContourSet: input IO not writing.\n"); 
+		return false;
+	}
+	
+	io->write(&CSET_MAGIC, sizeof(CSET_MAGIC));
+	io->write(&CSET_VERSION, sizeof(CSET_VERSION));
+
+	io->write(&cset.nconts, sizeof(cset.nconts));
+	
+	io->write(cset.bmin, sizeof(cset.bmin));
+	io->write(cset.bmax, sizeof(cset.bmax));
+	
+	io->write(&cset.cs, sizeof(cset.cs));
+	io->write(&cset.ch, sizeof(cset.ch));
+
+	io->write(&cset.width, sizeof(cset.width));
+	io->write(&cset.height, sizeof(cset.height));
+	io->write(&cset.borderSize, sizeof(cset.borderSize));
+
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		const rcContour& cont = cset.conts[i];
+		io->write(&cont.nverts, sizeof(cont.nverts));
+		io->write(&cont.nrverts, sizeof(cont.nrverts));
+		io->write(&cont.reg, sizeof(cont.reg));
+		io->write(&cont.area, sizeof(cont.area));
+		io->write(cont.verts, sizeof(int)*4*cont.nverts);
+		io->write(cont.rverts, sizeof(int)*4*cont.nrverts);
+	}
+
+	return true;
+}
+
+bool duReadContourSet(struct rcContourSet& cset, duFileIO* io)
+{
+	if (!io)
+	{
+		printf("duReadContourSet: input IO is null.\n"); 
+		return false;
+	}
+	if (!io->isReading())
+	{
+		printf("duReadContourSet: input IO not reading.\n"); 
+		return false;
+	}
+	
+	int magic = 0;
+	int version = 0;
+	
+	io->read(&magic, sizeof(magic));
+	io->read(&version, sizeof(version));
+	
+	if (magic != CSET_MAGIC)
+	{
+		printf("duReadContourSet: Bad voodoo.\n");
+		return false;
+	}
+	if (version != CSET_VERSION)
+	{
+		printf("duReadContourSet: Bad version.\n");
+		return false;
+	}
+	
+	io->read(&cset.nconts, sizeof(cset.nconts));
+
+	cset.conts = (rcContour*)rcAlloc(sizeof(rcContour)*cset.nconts, RC_ALLOC_PERM);
+	if (!cset.conts)
+	{
+		printf("duReadContourSet: Could not alloc contours (%d)\n", cset.nconts);
+		return false;
+	}
+	memset(cset.conts, 0, sizeof(rcContour)*cset.nconts);
+	
+	io->read(cset.bmin, sizeof(cset.bmin));
+	io->read(cset.bmax, sizeof(cset.bmax));
+	
+	io->read(&cset.cs, sizeof(cset.cs));
+	io->read(&cset.ch, sizeof(cset.ch));
+	
+	io->read(&cset.width, sizeof(cset.width));
+	io->read(&cset.height, sizeof(cset.height));
+	io->read(&cset.borderSize, sizeof(cset.borderSize));
+	
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		rcContour& cont = cset.conts[i];
+		io->read(&cont.nverts, sizeof(cont.nverts));
+		io->read(&cont.nrverts, sizeof(cont.nrverts));
+		io->read(&cont.reg, sizeof(cont.reg));
+		io->read(&cont.area, sizeof(cont.area));
+
+		cont.verts = (int*)rcAlloc(sizeof(int)*4*cont.nverts, RC_ALLOC_PERM);
+		if (!cont.verts)
+		{
+			printf("duReadContourSet: Could not alloc contour verts (%d)\n", cont.nverts);
+			return false;
+		}
+		cont.rverts = (int*)rcAlloc(sizeof(int)*4*cont.nrverts, RC_ALLOC_PERM);
+		if (!cont.rverts)
+		{
+			printf("duReadContourSet: Could not alloc contour rverts (%d)\n", cont.nrverts);
+			return false;
+		}
+		
+		io->read(cont.verts, sizeof(int)*4*cont.nverts);
+		io->read(cont.rverts, sizeof(int)*4*cont.nrverts);
+	}
+	
+	return true;
+}
+	
+
+static const int CHF_MAGIC = ('r' << 24) | ('c' << 16) | ('h' << 8) | 'f';
+static const int CHF_VERSION = 3;
+
+bool duDumpCompactHeightfield(struct rcCompactHeightfield& chf, duFileIO* io)
+{
+	if (!io)
+	{
+		printf("duDumpCompactHeightfield: input IO is null.\n"); 
+		return false;
+	}
+	if (!io->isWriting())
+	{
+		printf("duDumpCompactHeightfield: input IO not writing.\n"); 
+		return false;
+	}
+	
+	io->write(&CHF_MAGIC, sizeof(CHF_MAGIC));
+	io->write(&CHF_VERSION, sizeof(CHF_VERSION));
+	
+	io->write(&chf.width, sizeof(chf.width));
+	io->write(&chf.height, sizeof(chf.height));
+	io->write(&chf.spanCount, sizeof(chf.spanCount));
+
+	io->write(&chf.walkableHeight, sizeof(chf.walkableHeight));
+	io->write(&chf.walkableClimb, sizeof(chf.walkableClimb));
+	io->write(&chf.borderSize, sizeof(chf.borderSize));
+
+	io->write(&chf.maxDistance, sizeof(chf.maxDistance));
+	io->write(&chf.maxRegions, sizeof(chf.maxRegions));
+
+	io->write(chf.bmin, sizeof(chf.bmin));
+	io->write(chf.bmax, sizeof(chf.bmax));
+
+	io->write(&chf.cs, sizeof(chf.cs));
+	io->write(&chf.ch, sizeof(chf.ch));
+
+	int tmp = 0;
+	if (chf.cells) tmp |= 1;
+	if (chf.spans) tmp |= 2;
+	if (chf.dist) tmp |= 4;
+	if (chf.areas) tmp |= 8;
+
+	io->write(&tmp, sizeof(tmp));
+
+	if (chf.cells)
+		io->write(chf.cells, sizeof(rcCompactCell)*chf.width*chf.height);
+	if (chf.spans)
+		io->write(chf.spans, sizeof(rcCompactSpan)*chf.spanCount);
+	if (chf.dist)
+		io->write(chf.dist, sizeof(unsigned short)*chf.spanCount);
+	if (chf.areas)
+		io->write(chf.areas, sizeof(unsigned char)*chf.spanCount);
+
+	return true;
+}
+
+bool duReadCompactHeightfield(struct rcCompactHeightfield& chf, duFileIO* io)
+{
+	if (!io)
+	{
+		printf("duReadCompactHeightfield: input IO is null.\n"); 
+		return false;
+	}
+	if (!io->isReading())
+	{
+		printf("duReadCompactHeightfield: input IO not reading.\n"); 
+		return false;
+	}
+
+	int magic = 0;
+	int version = 0;
+	
+	io->read(&magic, sizeof(magic));
+	io->read(&version, sizeof(version));
+	
+	if (magic != CHF_MAGIC)
+	{
+		printf("duReadCompactHeightfield: Bad voodoo.\n");
+		return false;
+	}
+	if (version != CHF_VERSION)
+	{
+		printf("duReadCompactHeightfield: Bad version.\n");
+		return false;
+	}
+	
+	io->read(&chf.width, sizeof(chf.width));
+	io->read(&chf.height, sizeof(chf.height));
+	io->read(&chf.spanCount, sizeof(chf.spanCount));
+	
+	io->read(&chf.walkableHeight, sizeof(chf.walkableHeight));
+	io->read(&chf.walkableClimb, sizeof(chf.walkableClimb));
+	io->write(&chf.borderSize, sizeof(chf.borderSize));
+
+	io->read(&chf.maxDistance, sizeof(chf.maxDistance));
+	io->read(&chf.maxRegions, sizeof(chf.maxRegions));
+	
+	io->read(chf.bmin, sizeof(chf.bmin));
+	io->read(chf.bmax, sizeof(chf.bmax));
+	
+	io->read(&chf.cs, sizeof(chf.cs));
+	io->read(&chf.ch, sizeof(chf.ch));
+	
+	int tmp = 0;
+	io->read(&tmp, sizeof(tmp));
+	
+	if (tmp & 1)
+	{
+		chf.cells = (rcCompactCell*)rcAlloc(sizeof(rcCompactCell)*chf.width*chf.height, RC_ALLOC_PERM);
+		if (!chf.cells)
+		{
+			printf("duReadCompactHeightfield: Could not alloc cells (%d)\n", chf.width*chf.height);
+			return false;
+		}
+		io->read(chf.cells, sizeof(rcCompactCell)*chf.width*chf.height);
+	}
+	if (tmp & 2)
+	{
+		chf.spans = (rcCompactSpan*)rcAlloc(sizeof(rcCompactSpan)*chf.spanCount, RC_ALLOC_PERM);
+		if (!chf.spans)
+		{
+			printf("duReadCompactHeightfield: Could not alloc spans (%d)\n", chf.spanCount);
+			return false;
+		}
+		io->read(chf.spans, sizeof(rcCompactSpan)*chf.spanCount);
+	}
+	if (tmp & 4)
+	{
+		chf.dist = (unsigned short*)rcAlloc(sizeof(unsigned short)*chf.spanCount, RC_ALLOC_PERM);
+		if (!chf.dist)
+		{
+			printf("duReadCompactHeightfield: Could not alloc dist (%d)\n", chf.spanCount);
+			return false;
+		}
+		io->read(chf.dist, sizeof(unsigned short)*chf.spanCount);
+	}
+	if (tmp & 8)
+	{
+		chf.areas = (unsigned char*)rcAlloc(sizeof(unsigned char)*chf.spanCount, RC_ALLOC_PERM);
+		if (!chf.areas)
+		{
+			printf("duReadCompactHeightfield: Could not alloc areas (%d)\n", chf.spanCount);
+			return false;
+		}
+		io->read(chf.areas, sizeof(unsigned char)*chf.spanCount);
+	}
+	
+	return true;
+}
+
+
+static void logLine(rcContext& ctx, rcTimerLabel label, const char* name, const float pc)
+{
+	const int t = ctx.getAccumulatedTime(label);
+	if (t < 0) return;
+	ctx.log(RC_LOG_PROGRESS, "%s:\t%.2fms\t(%.1f%%)", name, t/1000.0f, t*pc);
+}
+
+void duLogBuildTimes(rcContext& ctx, const int totalTimeUsec)
+{
+	const float pc = 100.0f / totalTimeUsec;
+ 
+	ctx.log(RC_LOG_PROGRESS, "Build Times");
+	logLine(ctx, RC_TIMER_RASTERIZE_TRIANGLES,		"- Rasterize", pc);
+	logLine(ctx, RC_TIMER_BUILD_COMPACTHEIGHTFIELD,	"- Build Compact", pc);
+	logLine(ctx, RC_TIMER_FILTER_BORDER,				"- Filter Border", pc);
+	logLine(ctx, RC_TIMER_FILTER_WALKABLE,			"- Filter Walkable", pc);
+	logLine(ctx, RC_TIMER_ERODE_AREA,				"- Erode Area", pc);
+	logLine(ctx, RC_TIMER_MEDIAN_AREA,				"- Median Area", pc);
+	logLine(ctx, RC_TIMER_MARK_BOX_AREA,				"- Mark Box Area", pc);
+	logLine(ctx, RC_TIMER_MARK_CONVEXPOLY_AREA,		"- Mark Convex Area", pc);
+	logLine(ctx, RC_TIMER_MARK_CYLINDER_AREA,		"- Mark Cylinder Area", pc);
+	logLine(ctx, RC_TIMER_BUILD_DISTANCEFIELD,		"- Build Distance Field", pc);
+	logLine(ctx, RC_TIMER_BUILD_DISTANCEFIELD_DIST,	"    - Distance", pc);
+	logLine(ctx, RC_TIMER_BUILD_DISTANCEFIELD_BLUR,	"    - Blur", pc);
+	logLine(ctx, RC_TIMER_BUILD_REGIONS,				"- Build Regions", pc);
+	logLine(ctx, RC_TIMER_BUILD_REGIONS_WATERSHED,	"    - Watershed", pc);
+	logLine(ctx, RC_TIMER_BUILD_REGIONS_EXPAND,		"      - Expand", pc);
+	logLine(ctx, RC_TIMER_BUILD_REGIONS_FLOOD,		"      - Find Basins", pc);
+	logLine(ctx, RC_TIMER_BUILD_REGIONS_FILTER,		"    - Filter", pc);
+	logLine(ctx, RC_TIMER_BUILD_LAYERS,				"- Build Layers", pc);
+	logLine(ctx, RC_TIMER_BUILD_CONTOURS,			"- Build Contours", pc);
+	logLine(ctx, RC_TIMER_BUILD_CONTOURS_TRACE,		"    - Trace", pc);
+	logLine(ctx, RC_TIMER_BUILD_CONTOURS_SIMPLIFY,	"    - Simplify", pc);
+	logLine(ctx, RC_TIMER_BUILD_POLYMESH,			"- Build Polymesh", pc);
+	logLine(ctx, RC_TIMER_BUILD_POLYMESHDETAIL,		"- Build Polymesh Detail", pc);
+	logLine(ctx, RC_TIMER_MERGE_POLYMESH,			"- Merge Polymeshes", pc);
+	logLine(ctx, RC_TIMER_MERGE_POLYMESHDETAIL,		"- Merge Polymesh Details", pc);
+	ctx.log(RC_LOG_PROGRESS, "=== TOTAL:\t%.2fms", totalTimeUsec/1000.0f);
+}
+
diff --git a/Engine/lib/recast/Detour/CMakeLists.txt b/Engine/lib/recast/Detour/CMakeLists.txt
new file mode 100644
index 000000000..e05ed11fa
--- /dev/null
+++ b/Engine/lib/recast/Detour/CMakeLists.txt
@@ -0,0 +1,24 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+SET(detour_SRCS
+	Source/DetourAlloc.cpp
+	Source/DetourCommon.cpp
+	Source/DetourNavMesh.cpp
+	Source/DetourNavMeshBuilder.cpp
+	Source/DetourNavMeshQuery.cpp
+	Source/DetourNode.cpp
+)
+
+SET(detour_HDRS
+	Include/DetourAlloc.h
+	Include/DetourAssert.h
+	Include/DetourCommon.h
+	Include/DetourNavMesh.h
+	Include/DetourNavMeshBuilder.h
+	Include/DetourNavMeshQuery.h
+	Include/DetourNode.h
+)
+
+INCLUDE_DIRECTORIES(Include)
+
+ADD_LIBRARY(Detour ${detour_SRCS} ${detour_HDRS})
diff --git a/Engine/lib/recast/Detour/Include/DetourAlloc.h b/Engine/lib/recast/Detour/Include/DetourAlloc.h
new file mode 100644
index 000000000..e814b62a7
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourAlloc.h
@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURALLOCATOR_H
+#define DETOURALLOCATOR_H
+
+/// Provides hint values to the memory allocator on how long the
+/// memory is expected to be used.
+enum dtAllocHint
+{
+	DT_ALLOC_PERM,		///< Memory persist after a function call.
+	DT_ALLOC_TEMP		///< Memory used temporarily within a function.
+};
+
+/// A memory allocation function.
+//  @param[in]		size			The size, in bytes of memory, to allocate.
+//  @param[in]		rcAllocHint	A hint to the allocator on how long the memory is expected to be in use.
+//  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+///  @see dtAllocSetCustom
+typedef void* (dtAllocFunc)(int size, dtAllocHint hint);
+
+/// A memory deallocation function.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #dtAllocFunc.
+/// @see dtAllocSetCustom
+typedef void (dtFreeFunc)(void* ptr);
+
+/// Sets the base custom allocation functions to be used by Detour.
+///  @param[in]		allocFunc	The memory allocation function to be used by #dtAlloc
+///  @param[in]		freeFunc	The memory de-allocation function to be used by #dtFree
+void dtAllocSetCustom(dtAllocFunc *allocFunc, dtFreeFunc *freeFunc);
+
+/// Allocates a memory block.
+///  @param[in]		size	The size, in bytes of memory, to allocate.
+///  @param[in]		hint	A hint to the allocator on how long the memory is expected to be in use.
+///  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+/// @see dtFree
+void* dtAlloc(int size, dtAllocHint hint);
+
+/// Deallocates a memory block.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #dtAlloc.
+/// @see dtAlloc
+void dtFree(void* ptr);
+
+#endif
diff --git a/Engine/lib/recast/Detour/Include/DetourAssert.h b/Engine/lib/recast/Detour/Include/DetourAssert.h
new file mode 100644
index 000000000..3cf652288
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourAssert.h
@@ -0,0 +1,33 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURASSERT_H
+#define DETOURASSERT_H
+
+// Note: This header file's only purpose is to include define assert.
+// Feel free to change the file and include your own implementation instead.
+
+#ifdef NDEBUG
+// From http://cnicholson.net/2009/02/stupid-c-tricks-adventures-in-assert/
+#	define dtAssert(x) do { (void)sizeof(x); } while((void)(__LINE__==-1),false)  
+#else
+#	include <assert.h> 
+#	define dtAssert assert
+#endif
+
+#endif // DETOURASSERT_H
diff --git a/Engine/lib/recast/Detour/Include/DetourCommon.h b/Engine/lib/recast/Detour/Include/DetourCommon.h
new file mode 100644
index 000000000..34f46d8d3
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourCommon.h
@@ -0,0 +1,526 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURCOMMON_H
+#define DETOURCOMMON_H
+
+/**
+@defgroup detour Detour
+
+Members in this module are used to create, manipulate, and query navigation 
+meshes.
+
+@note This is a summary list of members.  Use the index or search 
+feature to find minor members.
+*/
+
+/// @name General helper functions
+/// @{
+
+/// Swaps the values of the two parameters.
+///  @param[in,out]	a	Value A
+///  @param[in,out]	b	Value B
+template<class T> inline void dtSwap(T& a, T& b) { T t = a; a = b; b = t; }
+
+/// Returns the minimum of two values.
+///  @param[in]		a	Value A
+///  @param[in]		b	Value B
+///  @return The minimum of the two values.
+template<class T> inline T dtMin(T a, T b) { return a < b ? a : b; }
+
+/// Returns the maximum of two values.
+///  @param[in]		a	Value A
+///  @param[in]		b	Value B
+///  @return The maximum of the two values.
+template<class T> inline T dtMax(T a, T b) { return a > b ? a : b; }
+
+/// Returns the absolute value.
+///  @param[in]		a	The value.
+///  @return The absolute value of the specified value.
+template<class T> inline T dtAbs(T a) { return a < 0 ? -a : a; }
+
+/// Returns the square of the value.
+///  @param[in]		a	The value.
+///  @return The square of the value.
+template<class T> inline T dtSqr(T a) { return a*a; }
+
+/// Clamps the value to the specified range.
+///  @param[in]		v	The value to clamp.
+///  @param[in]		mn	The minimum permitted return value.
+///  @param[in]		mx	The maximum permitted return value.
+///  @return The value, clamped to the specified range.
+template<class T> inline T dtClamp(T v, T mn, T mx) { return v < mn ? mn : (v > mx ? mx : v); }
+
+/// Returns the square root of the value.
+///  @param[in]		x	The value.
+///  @return The square root of the vlaue.
+float dtSqrt(float x);
+
+/// @}
+/// @name Vector helper functions.
+/// @{
+
+/// Derives the cross product of two vectors. (@p v1 x @p v2)
+///  @param[out]	dest	The cross product. [(x, y, z)]
+///  @param[in]		v1		A Vector [(x, y, z)]
+///  @param[in]		v2		A vector [(x, y, z)]
+inline void dtVcross(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[1]*v2[2] - v1[2]*v2[1];
+	dest[1] = v1[2]*v2[0] - v1[0]*v2[2];
+	dest[2] = v1[0]*v2[1] - v1[1]*v2[0]; 
+}
+
+/// Derives the dot product of two vectors. (@p v1 . @p v2)
+///  @param[in]		v1	A Vector [(x, y, z)]
+///  @param[in]		v2	A vector [(x, y, z)]
+/// @return The dot product.
+inline float dtVdot(const float* v1, const float* v2)
+{
+	return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
+}
+
+/// Performs a scaled vector addition. (@p v1 + (@p v2 * @p s))
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to scale and add to @p v1. [(x, y, z)]
+///  @param[in]		s		The amount to scale @p v2 by before adding to @p v1.
+inline void dtVmad(float* dest, const float* v1, const float* v2, const float s)
+{
+	dest[0] = v1[0]+v2[0]*s;
+	dest[1] = v1[1]+v2[1]*s;
+	dest[2] = v1[2]+v2[2]*s;
+}
+
+/// Performs a linear interpolation between two vectors. (@p v1 toward @p v2)
+///  @param[out]	dest	The result vector. [(x, y, x)]
+///  @param[in]		v1		The starting vector.
+///  @param[in]		v2		The destination vector.
+///	 @param[in]		t		The interpolation factor. [Limits: 0 <= value <= 1.0]
+inline void dtVlerp(float* dest, const float* v1, const float* v2, const float t)
+{
+	dest[0] = v1[0]+(v2[0]-v1[0])*t;
+	dest[1] = v1[1]+(v2[1]-v1[1])*t;
+	dest[2] = v1[2]+(v2[2]-v1[2])*t;
+}
+
+/// Performs a vector addition. (@p v1 + @p v2)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to add to @p v1. [(x, y, z)]
+inline void dtVadd(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[0]+v2[0];
+	dest[1] = v1[1]+v2[1];
+	dest[2] = v1[2]+v2[2];
+}
+
+/// Performs a vector subtraction. (@p v1 - @p v2)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to subtract from @p v1. [(x, y, z)]
+inline void dtVsub(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[0]-v2[0];
+	dest[1] = v1[1]-v2[1];
+	dest[2] = v1[2]-v2[2];
+}
+
+/// Scales the vector by the specified value. (@p v * @p t)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v		The vector to scale. [(x, y, z)]
+///  @param[in]		t		The scaling factor.
+inline void dtVscale(float* dest, const float* v, const float t)
+{
+	dest[0] = v[0]*t;
+	dest[1] = v[1]*t;
+	dest[2] = v[2]*t;
+}
+
+/// Selects the minimum value of each element from the specified vectors.
+///  @param[in,out]	mn	A vector.  (Will be updated with the result.) [(x, y, z)]
+///  @param[in]	v	A vector. [(x, y, z)]
+inline void dtVmin(float* mn, const float* v)
+{
+	mn[0] = dtMin(mn[0], v[0]);
+	mn[1] = dtMin(mn[1], v[1]);
+	mn[2] = dtMin(mn[2], v[2]);
+}
+
+/// Selects the maximum value of each element from the specified vectors.
+///  @param[in,out]	mx	A vector.  (Will be updated with the result.) [(x, y, z)]
+///  @param[in]		v	A vector. [(x, y, z)]
+inline void dtVmax(float* mx, const float* v)
+{
+	mx[0] = dtMax(mx[0], v[0]);
+	mx[1] = dtMax(mx[1], v[1]);
+	mx[2] = dtMax(mx[2], v[2]);
+}
+
+/// Sets the vector elements to the specified values.
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		x		The x-value of the vector.
+///  @param[in]		y		The y-value of the vector.
+///  @param[in]		z		The z-value of the vector.
+inline void dtVset(float* dest, const float x, const float y, const float z)
+{
+	dest[0] = x; dest[1] = y; dest[2] = z;
+}
+
+/// Performs a vector copy.
+///  @param[out]	dest	The result. [(x, y, z)]
+///  @param[in]		a		The vector to copy. [(x, y, z)]
+inline void dtVcopy(float* dest, const float* a)
+{
+	dest[0] = a[0];
+	dest[1] = a[1];
+	dest[2] = a[2];
+}
+
+/// Derives the scalar length of the vector.
+///  @param[in]		v The vector. [(x, y, z)]
+/// @return The scalar length of the vector.
+inline float dtVlen(const float* v)
+{
+	return dtSqrt(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
+}
+
+/// Derives the square of the scalar length of the vector. (len * len)
+///  @param[in]		v The vector. [(x, y, z)]
+/// @return The square of the scalar length of the vector.
+inline float dtVlenSqr(const float* v)
+{
+	return v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
+}
+
+/// Returns the distance between two points.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The distance between the two points.
+inline float dtVdist(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dy = v2[1] - v1[1];
+	const float dz = v2[2] - v1[2];
+	return dtSqrt(dx*dx + dy*dy + dz*dz);
+}
+
+/// Returns the square of the distance between two points.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The square of the distance between the two points.
+inline float dtVdistSqr(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dy = v2[1] - v1[1];
+	const float dz = v2[2] - v1[2];
+	return dx*dx + dy*dy + dz*dz;
+}
+
+/// Derives the distance between the specified points on the xz-plane.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The distance between the point on the xz-plane.
+///
+/// The vectors are projected onto the xz-plane, so the y-values are ignored.
+inline float dtVdist2D(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dz = v2[2] - v1[2];
+	return dtSqrt(dx*dx + dz*dz);
+}
+
+/// Derives the square of the distance between the specified points on the xz-plane.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The square of the distance between the point on the xz-plane.
+inline float dtVdist2DSqr(const float* v1, const float* v2)
+{
+	const float dx = v2[0] - v1[0];
+	const float dz = v2[2] - v1[2];
+	return dx*dx + dz*dz;
+}
+
+/// Normalizes the vector.
+///  @param[in,out]	v	The vector to normalize. [(x, y, z)]
+inline void dtVnormalize(float* v)
+{
+	float d = 1.0f / dtSqrt(dtSqr(v[0]) + dtSqr(v[1]) + dtSqr(v[2]));
+	v[0] *= d;
+	v[1] *= d;
+	v[2] *= d;
+}
+
+/// Performs a 'sloppy' colocation check of the specified points.
+///  @param[in]		p0	A point. [(x, y, z)]
+///  @param[in]		p1	A point. [(x, y, z)]
+/// @return True if the points are considered to be at the same location.
+///
+/// Basically, this function will return true if the specified points are 
+/// close enough to eachother to be considered colocated.
+inline bool dtVequal(const float* p0, const float* p1)
+{
+	static const float thr = dtSqr(1.0f/16384.0f);
+	const float d = dtVdistSqr(p0, p1);
+	return d < thr;
+}
+
+/// Derives the dot product of two vectors on the xz-plane. (@p u . @p v)
+///  @param[in]		u		A vector [(x, y, z)]
+///  @param[in]		v		A vector [(x, y, z)]
+/// @return The dot product on the xz-plane.
+///
+/// The vectors are projected onto the xz-plane, so the y-values are ignored.
+inline float dtVdot2D(const float* u, const float* v)
+{
+	return u[0]*v[0] + u[2]*v[2];
+}
+
+/// Derives the xz-plane 2D perp product of the two vectors. (uz*vx - ux*vz)
+///  @param[in]		u		The LHV vector [(x, y, z)]
+///  @param[in]		v		The RHV vector [(x, y, z)]
+/// @return The dot product on the xz-plane.
+///
+/// The vectors are projected onto the xz-plane, so the y-values are ignored.
+inline float dtVperp2D(const float* u, const float* v)
+{
+	return u[2]*v[0] - u[0]*v[2];
+}
+
+/// @}
+/// @name Computational geometry helper functions.
+/// @{
+
+/// Derives the signed xz-plane area of the triangle ABC, or the relationship of line AB to point C.
+///  @param[in]		a		Vertex A. [(x, y, z)]
+///  @param[in]		b		Vertex B. [(x, y, z)]
+///  @param[in]		c		Vertex C. [(x, y, z)]
+/// @return The signed xz-plane area of the triangle.
+inline float dtTriArea2D(const float* a, const float* b, const float* c)
+{
+	const float abx = b[0] - a[0];
+	const float abz = b[2] - a[2];
+	const float acx = c[0] - a[0];
+	const float acz = c[2] - a[2];
+	return acx*abz - abx*acz;
+}
+
+/// Determines if two axis-aligned bounding boxes overlap.
+///  @param[in]		amin	Minimum bounds of box A. [(x, y, z)]
+///  @param[in]		amax	Maximum bounds of box A. [(x, y, z)]
+///  @param[in]		bmin	Minimum bounds of box B. [(x, y, z)]
+///  @param[in]		bmax	Maximum bounds of box B. [(x, y, z)]
+/// @return True if the two AABB's overlap.
+/// @see dtOverlapBounds
+inline bool dtOverlapQuantBounds(const unsigned short amin[3], const unsigned short amax[3],
+								 const unsigned short bmin[3], const unsigned short bmax[3])
+{
+	bool overlap = true;
+	overlap = (amin[0] > bmax[0] || amax[0] < bmin[0]) ? false : overlap;
+	overlap = (amin[1] > bmax[1] || amax[1] < bmin[1]) ? false : overlap;
+	overlap = (amin[2] > bmax[2] || amax[2] < bmin[2]) ? false : overlap;
+	return overlap;
+}
+
+/// Determines if two axis-aligned bounding boxes overlap.
+///  @param[in]		amin	Minimum bounds of box A. [(x, y, z)]
+///  @param[in]		amax	Maximum bounds of box A. [(x, y, z)]
+///  @param[in]		bmin	Minimum bounds of box B. [(x, y, z)]
+///  @param[in]		bmax	Maximum bounds of box B. [(x, y, z)]
+/// @return True if the two AABB's overlap.
+/// @see dtOverlapQuantBounds
+inline bool dtOverlapBounds(const float* amin, const float* amax,
+							const float* bmin, const float* bmax)
+{
+	bool overlap = true;
+	overlap = (amin[0] > bmax[0] || amax[0] < bmin[0]) ? false : overlap;
+	overlap = (amin[1] > bmax[1] || amax[1] < bmin[1]) ? false : overlap;
+	overlap = (amin[2] > bmax[2] || amax[2] < bmin[2]) ? false : overlap;
+	return overlap;
+}
+
+/// Derives the closest point on a triangle from the specified reference point.
+///  @param[out]	closest	The closest point on the triangle.	
+///  @param[in]		p		The reference point from which to test. [(x, y, z)]
+///  @param[in]		a		Vertex A of triangle ABC. [(x, y, z)]
+///  @param[in]		b		Vertex B of triangle ABC. [(x, y, z)]
+///  @param[in]		c		Vertex C of triangle ABC. [(x, y, z)]
+void dtClosestPtPointTriangle(float* closest, const float* p,
+							  const float* a, const float* b, const float* c);
+
+/// Derives the y-axis height of the closest point on the triangle from the specified reference point.
+///  @param[in]		p		The reference point from which to test. [(x, y, z)]
+///  @param[in]		a		Vertex A of triangle ABC. [(x, y, z)]
+///  @param[in]		b		Vertex B of triangle ABC. [(x, y, z)]
+///  @param[in]		c		Vertex C of triangle ABC. [(x, y, z)]
+///  @param[out]	h		The resulting height.
+bool dtClosestHeightPointTriangle(const float* p, const float* a, const float* b, const float* c, float& h);
+
+bool dtIntersectSegmentPoly2D(const float* p0, const float* p1,
+							  const float* verts, int nverts,
+							  float& tmin, float& tmax,
+							  int& segMin, int& segMax);
+
+/// Determines if the specified point is inside the convex polygon on the xz-plane.
+///  @param[in]		pt		The point to check. [(x, y, z)]
+///  @param[in]		verts	The polygon vertices. [(x, y, z) * @p nverts]
+///  @param[in]		nverts	The number of vertices. [Limit: >= 3]
+/// @return True if the point is inside the polygon.
+bool dtPointInPolygon(const float* pt, const float* verts, const int nverts);
+
+bool dtDistancePtPolyEdgesSqr(const float* pt, const float* verts, const int nverts,
+							float* ed, float* et);
+
+float dtDistancePtSegSqr2D(const float* pt, const float* p, const float* q, float& t);
+
+/// Derives the centroid of a convex polygon.
+///  @param[out]	tc		The centroid of the polgyon. [(x, y, z)]
+///  @param[in]		idx		The polygon indices. [(vertIndex) * @p nidx]
+///  @param[in]		nidx	The number of indices in the polygon. [Limit: >= 3]
+///  @param[in]		verts	The polygon vertices. [(x, y, z) * vertCount]
+void dtCalcPolyCenter(float* tc, const unsigned short* idx, int nidx, const float* verts);
+
+/// Determines if the two convex polygons overlap on the xz-plane.
+///  @param[in]		polya		Polygon A vertices.	[(x, y, z) * @p npolya]
+///  @param[in]		npolya		The number of vertices in polygon A.
+///  @param[in]		polyb		Polygon B vertices.	[(x, y, z) * @p npolyb]
+///  @param[in]		npolyb		The number of vertices in polygon B.
+/// @return True if the two polygons overlap.
+bool dtOverlapPolyPoly2D(const float* polya, const int npolya,
+						 const float* polyb, const int npolyb);
+
+/// @}
+/// @name Miscellanious functions.
+/// @{
+
+inline unsigned int dtNextPow2(unsigned int v)
+{
+	v--;
+	v |= v >> 1;
+	v |= v >> 2;
+	v |= v >> 4;
+	v |= v >> 8;
+	v |= v >> 16;
+	v++;
+	return v;
+}
+
+inline unsigned int dtIlog2(unsigned int v)
+{
+	unsigned int r;
+	unsigned int shift;
+	r = (v > 0xffff) << 4; v >>= r;
+	shift = (v > 0xff) << 3; v >>= shift; r |= shift;
+	shift = (v > 0xf) << 2; v >>= shift; r |= shift;
+	shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+	r |= (v >> 1);
+	return r;
+}
+
+inline int dtAlign4(int x) { return (x+3) & ~3; }
+
+inline int dtOppositeTile(int side) { return (side+4) & 0x7; }
+
+inline void dtSwapByte(unsigned char* a, unsigned char* b)
+{
+	unsigned char tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+inline void dtSwapEndian(unsigned short* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+1);
+}
+
+inline void dtSwapEndian(short* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+1);
+}
+
+inline void dtSwapEndian(unsigned int* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+3); dtSwapByte(x+1, x+2);
+}
+
+inline void dtSwapEndian(int* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+3); dtSwapByte(x+1, x+2);
+}
+
+inline void dtSwapEndian(float* v)
+{
+	unsigned char* x = (unsigned char*)v;
+	dtSwapByte(x+0, x+3); dtSwapByte(x+1, x+2);
+}
+
+void dtRandomPointInConvexPoly(const float* pts, const int npts, float* areas,
+							   const float s, const float t, float* out);
+
+/// @}
+
+#endif // DETOURCOMMON_H
+
+///////////////////////////////////////////////////////////////////////////
+
+// This section contains detailed documentation for members that don't have
+// a source file. It reduces clutter in the main section of the header.
+
+/**
+
+@fn float dtTriArea2D(const float* a, const float* b, const float* c)
+@par
+
+The vertices are projected onto the xz-plane, so the y-values are ignored.
+
+This is a low cost function than can be used for various purposes.  Its main purpose
+is for point/line relationship testing.
+
+In all cases: A value of zero indicates that all vertices are collinear or represent the same point.
+(On the xz-plane.)
+
+When used for point/line relationship tests, AB usually represents a line against which
+the C point is to be tested.  In this case:
+
+A positive value indicates that point C is to the left of line AB, looking from A toward B.<br/>
+A negative value indicates that point C is to the right of lineAB, looking from A toward B.
+
+When used for evaluating a triangle:
+
+The absolute value of the return value is two times the area of the triangle when it is
+projected onto the xz-plane.
+
+A positive return value indicates:
+
+<ul>
+<li>The vertices are wrapped in the normal Detour wrap direction.</li>
+<li>The triangle's 3D face normal is in the general up direction.</li>
+</ul>
+
+A negative return value indicates:
+
+<ul>
+<li>The vertices are reverse wrapped. (Wrapped opposite the normal Detour wrap direction.)</li>
+<li>The triangle's 3D face normal is in the general down direction.</li>
+</ul>
+
+*/
diff --git a/Engine/lib/recast/Detour/Include/DetourNavMesh.h b/Engine/lib/recast/Detour/Include/DetourNavMesh.h
new file mode 100644
index 000000000..d4fbe9674
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourNavMesh.h
@@ -0,0 +1,682 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNAVMESH_H
+#define DETOURNAVMESH_H
+
+#include "DetourAlloc.h"
+#include "DetourStatus.h"
+
+// Note: If you want to use 64-bit refs, change the types of both dtPolyRef & dtTileRef.
+// It is also recommended that you change dtHashRef() to a proper 64-bit hash.
+
+/// A handle to a polygon within a navigation mesh tile.
+/// @ingroup detour
+typedef unsigned int dtPolyRef;
+
+/// A handle to a tile within a navigation mesh.
+/// @ingroup detour
+typedef unsigned int dtTileRef;
+
+/// The maximum number of vertices per navigation polygon.
+/// @ingroup detour
+static const int DT_VERTS_PER_POLYGON = 6;
+
+/// @{
+/// @name Tile Serialization Constants
+/// These constants are used to detect whether a navigation tile's data
+/// and state format is compatible with the current build.
+///
+
+/// A magic number used to detect compatibility of navigation tile data.
+static const int DT_NAVMESH_MAGIC = 'D'<<24 | 'N'<<16 | 'A'<<8 | 'V';
+
+/// A version number used to detect compatibility of navigation tile data.
+static const int DT_NAVMESH_VERSION = 7;
+
+/// A magic number used to detect the compatibility of navigation tile states.
+static const int DT_NAVMESH_STATE_MAGIC = 'D'<<24 | 'N'<<16 | 'M'<<8 | 'S';
+
+/// A version number used to detect compatibility of navigation tile states.
+static const int DT_NAVMESH_STATE_VERSION = 1;
+
+/// @}
+
+/// A flag that indicates that an entity links to an external entity.
+/// (E.g. A polygon edge is a portal that links to another polygon.)
+static const unsigned short DT_EXT_LINK = 0x8000;
+
+/// A value that indicates the entity does not link to anything.
+static const unsigned int DT_NULL_LINK = 0xffffffff;
+
+/// A flag that indicates that an off-mesh connection can be traversed in both directions. (Is bidirectional.)
+static const unsigned int DT_OFFMESH_CON_BIDIR = 1;
+
+/// The maximum number of user defined area ids.
+/// @ingroup detour
+static const int DT_MAX_AREAS = 64;
+
+/// Tile flags used for various functions and fields.
+/// For an example, see dtNavMesh::addTile().
+enum dtTileFlags
+{
+	/// The navigation mesh owns the tile memory and is responsible for freeing it.
+	DT_TILE_FREE_DATA = 0x01,
+};
+
+/// Vertex flags returned by dtNavMeshQuery::findStraightPath.
+enum dtStraightPathFlags
+{
+	DT_STRAIGHTPATH_START = 0x01,				///< The vertex is the start position in the path.
+	DT_STRAIGHTPATH_END = 0x02,					///< The vertex is the end position in the path.
+	DT_STRAIGHTPATH_OFFMESH_CONNECTION = 0x04,	///< The vertex is the start of an off-mesh connection.
+};
+
+/// Flags representing the type of a navigation mesh polygon.
+enum dtPolyTypes
+{
+	/// The polygon is a standard convex polygon that is part of the surface of the mesh.
+	DT_POLYTYPE_GROUND = 0,
+	/// The polygon is an off-mesh connection consisting of two vertices.
+	DT_POLYTYPE_OFFMESH_CONNECTION = 1,
+};
+
+
+/// Defines a polyogn within a dtMeshTile object.
+/// @ingroup detour
+struct dtPoly
+{
+	/// Index to first link in linked list. (Or #DT_NULL_LINK if there is no link.)
+	unsigned int firstLink;
+
+	/// The indices of the polygon's vertices.
+	/// The actual vertices are located in dtMeshTile::verts.
+	unsigned short verts[DT_VERTS_PER_POLYGON];
+
+	/// Packed data representing neighbor polygons references and flags for each edge.
+	unsigned short neis[DT_VERTS_PER_POLYGON];
+
+	/// The user defined polygon flags.
+	unsigned short flags;
+
+	/// The number of vertices in the polygon.
+	unsigned char vertCount;
+
+	/// The bit packed area id and polygon type.
+	/// @note Use the structure's set and get methods to acess this value.
+	unsigned char areaAndtype;
+
+	/// Sets the user defined area id. [Limit: < #DT_MAX_AREAS]
+	inline void setArea(unsigned char a) { areaAndtype = (areaAndtype & 0xc0) | (a & 0x3f); }
+
+	/// Sets the polygon type. (See: #dtPolyTypes.)
+	inline void setType(unsigned char t) { areaAndtype = (areaAndtype & 0x3f) | (t << 6); }
+
+	/// Gets the user defined area id.
+	inline unsigned char getArea() const { return areaAndtype & 0x3f; }
+
+	/// Gets the polygon type. (See: #dtPolyTypes)
+	inline unsigned char getType() const { return areaAndtype >> 6; }
+};
+
+/// Defines the location of detail sub-mesh data within a dtMeshTile.
+struct dtPolyDetail
+{
+	unsigned int vertBase;			///< The offset of the vertices in the dtMeshTile::detailVerts array.
+	unsigned int triBase;			///< The offset of the triangles in the dtMeshTile::detailTris array.
+	unsigned char vertCount;		///< The number of vertices in the sub-mesh.
+	unsigned char triCount;			///< The number of triangles in the sub-mesh.
+};
+
+/// Defines a link between polygons.
+/// @note This structure is rarely if ever used by the end user.
+/// @see dtMeshTile
+struct dtLink
+{
+	dtPolyRef ref;					///< Neighbour reference. (The neighbor that is linked to.)
+	unsigned int next;				///< Index of the next link.
+	unsigned char edge;				///< Index of the polygon edge that owns this link.
+	unsigned char side;				///< If a boundary link, defines on which side the link is.
+	unsigned char bmin;				///< If a boundary link, defines the minimum sub-edge area.
+	unsigned char bmax;				///< If a boundary link, defines the maximum sub-edge area.
+};
+
+/// Bounding volume node.
+/// @note This structure is rarely if ever used by the end user.
+/// @see dtMeshTile
+struct dtBVNode
+{
+	unsigned short bmin[3];			///< Minimum bounds of the node's AABB. [(x, y, z)]
+	unsigned short bmax[3];			///< Maximum bounds of the node's AABB. [(x, y, z)]
+	int i;							///< The node's index. (Negative for escape sequence.)
+};
+
+/// Defines an navigation mesh off-mesh connection within a dtMeshTile object.
+/// An off-mesh connection is a user defined traversable connection made up to two vertices.
+struct dtOffMeshConnection
+{
+	/// The endpoints of the connection. [(ax, ay, az, bx, by, bz)]
+	float pos[6];
+
+	/// The radius of the endpoints. [Limit: >= 0]
+	float rad;		
+
+	/// The polygon reference of the connection within the tile.
+	unsigned short poly;
+
+	/// Link flags. 
+	/// @note These are not the connection's user defined flags. Those are assigned via the 
+	/// connection's dtPoly definition. These are link flags used for internal purposes.
+	unsigned char flags;
+
+	/// End point side.
+	unsigned char side;
+
+	/// The id of the offmesh connection. (User assigned when the navigation mesh is built.)
+	unsigned int userId;
+};
+
+/// Provides high level information related to a dtMeshTile object.
+/// @ingroup detour
+struct dtMeshHeader
+{
+	int magic;				///< Tile magic number. (Used to identify the data format.)
+	int version;			///< Tile data format version number.
+	int x;					///< The x-position of the tile within the dtNavMesh tile grid. (x, y, layer)
+	int y;					///< The y-position of the tile within the dtNavMesh tile grid. (x, y, layer)
+	int layer;				///< The layer of the tile within the dtNavMesh tile grid. (x, y, layer)
+	unsigned int userId;	///< The user defined id of the tile.
+	int polyCount;			///< The number of polygons in the tile.
+	int vertCount;			///< The number of vertices in the tile.
+	int maxLinkCount;		///< The number of allocated links.
+	int detailMeshCount;	///< The number of sub-meshes in the detail mesh.
+	
+	/// The number of unique vertices in the detail mesh. (In addition to the polygon vertices.)
+	int detailVertCount;
+	
+	int detailTriCount;			///< The number of triangles in the detail mesh.
+	int bvNodeCount;			///< The number of bounding volume nodes. (Zero if bounding volumes are disabled.)
+	int offMeshConCount;		///< The number of off-mesh connections.
+	int offMeshBase;			///< The index of the first polygon which is an off-mesh connection.
+	float walkableHeight;		///< The height of the agents using the tile.
+	float walkableRadius;		///< The radius of the agents using the tile.
+	float walkableClimb;		///< The maximum climb height of the agents using the tile.
+	float bmin[3];				///< The minimum bounds of the tile's AABB. [(x, y, z)]
+	float bmax[3];				///< The maximum bounds of the tile's AABB. [(x, y, z)]
+	
+	/// The bounding volume quantization factor. 
+	float bvQuantFactor;
+};
+
+/// Defines a navigation mesh tile.
+/// @ingroup detour
+struct dtMeshTile
+{
+	unsigned int salt;					///< Counter describing modifications to the tile.
+
+	unsigned int linksFreeList;			///< Index to the next free link.
+	dtMeshHeader* header;				///< The tile header.
+	dtPoly* polys;						///< The tile polygons. [Size: dtMeshHeader::polyCount]
+	float* verts;						///< The tile vertices. [Size: dtMeshHeader::vertCount]
+	dtLink* links;						///< The tile links. [Size: dtMeshHeader::maxLinkCount]
+	dtPolyDetail* detailMeshes;			///< The tile's detail sub-meshes. [Size: dtMeshHeader::detailMeshCount]
+	
+	/// The detail mesh's unique vertices. [(x, y, z) * dtMeshHeader::detailVertCount]
+	float* detailVerts;	
+
+	/// The detail mesh's triangles. [(vertA, vertB, vertC) * dtMeshHeader::detailTriCount]
+	unsigned char* detailTris;	
+
+	/// The tile bounding volume nodes. [Size: dtMeshHeader::bvNodeCount]
+	/// (Will be null if bounding volumes are disabled.)
+	dtBVNode* bvTree;
+
+	dtOffMeshConnection* offMeshCons;		///< The tile off-mesh connections. [Size: dtMeshHeader::offMeshConCount]
+		
+	unsigned char* data;					///< The tile data. (Not directly accessed under normal situations.)
+	int dataSize;							///< Size of the tile data.
+	int flags;								///< Tile flags. (See: #dtTileFlags)
+	dtMeshTile* next;						///< The next free tile, or the next tile in the spatial grid.
+};
+
+/// Configuration parameters used to define multi-tile navigation meshes.
+/// The values are used to allocate space during the initialization of a navigation mesh.
+/// @see dtNavMesh::init()
+/// @ingroup detour
+struct dtNavMeshParams
+{
+	float orig[3];					///< The world space origin of the navigation mesh's tile space. [(x, y, z)]
+	float tileWidth;				///< The width of each tile. (Along the x-axis.)
+	float tileHeight;				///< The height of each tile. (Along the z-axis.)
+	int maxTiles;					///< The maximum number of tiles the navigation mesh can contain.
+	int maxPolys;					///< The maximum number of polygons each tile can contain.
+};
+
+/// A navigation mesh based on tiles of convex polygons.
+/// @ingroup detour
+class dtNavMesh
+{
+public:
+	dtNavMesh();
+	~dtNavMesh();
+
+	/// @{
+	/// @name Initialization and Tile Management
+
+	/// Initializes the navigation mesh for tiled use.
+	///  @param[in]	params		Initialization parameters.
+	/// @return The status flags for the operation.
+	dtStatus init(const dtNavMeshParams* params);
+
+	/// Initializes the navigation mesh for single tile use.
+	///  @param[in]	data		Data of the new tile. (See: #dtCreateNavMeshData)
+	///  @param[in]	dataSize	The data size of the new tile.
+	///  @param[in]	flags		The tile flags. (See: #dtTileFlags)
+	/// @return The status flags for the operation.
+	///  @see dtCreateNavMeshData
+	dtStatus init(unsigned char* data, const int dataSize, const int flags);
+	
+	/// The navigation mesh initialization params.
+	const dtNavMeshParams* getParams() const;
+
+	/// Adds a tile to the navigation mesh.
+	///  @param[in]		data		Data for the new tile mesh. (See: #dtCreateNavMeshData)
+	///  @param[in]		dataSize	Data size of the new tile mesh.
+	///  @param[in]		flags		Tile flags. (See: #dtTileFlags)
+	///  @param[in]		lastRef		The desired reference for the tile. (When reloading a tile.) [opt] [Default: 0]
+	///  @param[out]	result		The tile reference. (If the tile was succesfully added.) [opt]
+	/// @return The status flags for the operation.
+	dtStatus addTile(unsigned char* data, int dataSize, int flags, dtTileRef lastRef, dtTileRef* result);
+	
+	/// Removes the specified tile from the navigation mesh.
+	///  @param[in]		ref			The reference of the tile to remove.
+	///  @param[out]	data		Data associated with deleted tile.
+	///  @param[out]	dataSize	Size of the data associated with deleted tile.
+	/// @return The status flags for the operation.
+	dtStatus removeTile(dtTileRef ref, unsigned char** data, int* dataSize);
+
+	/// @}
+
+	/// @{
+	/// @name Query Functions
+
+	/// Calculates the tile grid location for the specified world position.
+	///  @param[in]	pos  The world position for the query. [(x, y, z)]
+	///  @param[out]	tx		The tile's x-location. (x, y)
+	///  @param[out]	ty		The tile's y-location. (x, y)
+	void calcTileLoc(const float* pos, int* tx, int* ty) const;
+
+	/// Gets the tile at the specified grid location.
+	///  @param[in]	x		The tile's x-location. (x, y, layer)
+	///  @param[in]	y		The tile's y-location. (x, y, layer)
+	///  @param[in]	layer	The tile's layer. (x, y, layer)
+	/// @return The tile, or null if the tile does not exist.
+	const dtMeshTile* getTileAt(const int x, const int y, const int layer) const;
+
+	/// Gets all tiles at the specified grid location. (All layers.)
+	///  @param[in]		x			The tile's x-location. (x, y)
+	///  @param[in]		y			The tile's y-location. (x, y)
+	///  @param[out]	tiles		A pointer to an array of tiles that will hold the result.
+	///  @param[in]		maxTiles	The maximum tiles the tiles parameter can hold.
+	/// @return The number of tiles returned in the tiles array.
+	int getTilesAt(const int x, const int y,
+				   dtMeshTile const** tiles, const int maxTiles) const;
+	
+	/// Gets the tile reference for the tile at specified grid location.
+	///  @param[in]	x		The tile's x-location. (x, y, layer)
+	///  @param[in]	y		The tile's y-location. (x, y, layer)
+	///  @param[in]	layer	The tile's layer. (x, y, layer)
+	/// @return The tile reference of the tile, or 0 if there is none.
+	dtTileRef getTileRefAt(int x, int y, int layer) const;
+
+	/// Gets the tile reference for the specified tile.
+	///  @param[in]	tile	The tile.
+	/// @return The tile reference of the tile.
+	dtTileRef getTileRef(const dtMeshTile* tile) const;
+
+	/// Gets the tile for the specified tile reference.
+	///  @param[in]	ref		The tile reference of the tile to retrieve.
+	/// @return The tile for the specified reference, or null if the 
+	///		reference is invalid.
+	const dtMeshTile* getTileByRef(dtTileRef ref) const;
+	
+	/// The maximum number of tiles supported by the navigation mesh.
+	/// @return The maximum number of tiles supported by the navigation mesh.
+	int getMaxTiles() const;
+	
+	/// Gets the tile at the specified index.
+	///  @param[in]	i		The tile index. [Limit: 0 >= index < #getMaxTiles()]
+	/// @return The tile at the specified index.
+	const dtMeshTile* getTile(int i) const;
+
+	/// Gets the tile and polygon for the specified polygon reference.
+	///  @param[in]		ref		The reference for the a polygon.
+	///  @param[out]	tile	The tile containing the polygon.
+	///  @param[out]	poly	The polygon.
+	/// @return The status flags for the operation.
+	dtStatus getTileAndPolyByRef(const dtPolyRef ref, const dtMeshTile** tile, const dtPoly** poly) const;
+	
+	/// Returns the tile and polygon for the specified polygon reference.
+	///  @param[in]		ref		A known valid reference for a polygon.
+	///  @param[out]	tile	The tile containing the polygon.
+	///  @param[out]	poly	The polygon.
+	void getTileAndPolyByRefUnsafe(const dtPolyRef ref, const dtMeshTile** tile, const dtPoly** poly) const;
+
+	/// Checks the validity of a polygon reference.
+	///  @param[in]	ref		The polygon reference to check.
+	/// @return True if polygon reference is valid for the navigation mesh.
+	bool isValidPolyRef(dtPolyRef ref) const;
+	
+	/// Gets the polygon reference for the tile's base polygon.
+	///  @param[in]	tile		The tile.
+	/// @return The polygon reference for the base polygon in the specified tile.
+	dtPolyRef getPolyRefBase(const dtMeshTile* tile) const;
+	
+	/// Gets the endpoints for an off-mesh connection, ordered by "direction of travel".
+	///  @param[in]		prevRef		The reference of the polygon before the connection.
+	///  @param[in]		polyRef		The reference of the off-mesh connection polygon.
+	///  @param[out]	startPos	The start position of the off-mesh connection. [(x, y, z)]
+	///  @param[out]	endPos		The end position of the off-mesh connection. [(x, y, z)]
+	/// @return The status flags for the operation.
+	dtStatus getOffMeshConnectionPolyEndPoints(dtPolyRef prevRef, dtPolyRef polyRef, float* startPos, float* endPos) const;
+
+	/// Gets the specified off-mesh connection.
+	///  @param[in]	ref		The polygon reference of the off-mesh connection.
+	/// @return The specified off-mesh connection, or null if the polygon reference is not valid.
+	const dtOffMeshConnection* getOffMeshConnectionByRef(dtPolyRef ref) const;
+	
+	/// @}
+
+	/// @{
+	/// @name State Management
+	/// These functions do not effect #dtTileRef or #dtPolyRef's. 
+
+	/// Sets the user defined flags for the specified polygon.
+	///  @param[in]	ref		The polygon reference.
+	///  @param[in]	flags	The new flags for the polygon.
+	/// @return The status flags for the operation.
+	dtStatus setPolyFlags(dtPolyRef ref, unsigned short flags);
+
+	/// Gets the user defined flags for the specified polygon.
+	///  @param[in]		ref				The polygon reference.
+	///  @param[out]	resultFlags		The polygon flags.
+	/// @return The status flags for the operation.
+	dtStatus getPolyFlags(dtPolyRef ref, unsigned short* resultFlags) const;
+
+	/// Sets the user defined area for the specified polygon.
+	///  @param[in]	ref		The polygon reference.
+	///  @param[in]	area	The new area id for the polygon. [Limit: < #DT_MAX_AREAS]
+	/// @return The status flags for the operation.
+	dtStatus setPolyArea(dtPolyRef ref, unsigned char area);
+
+	/// Gets the user defined area for the specified polygon.
+	///  @param[in]		ref			The polygon reference.
+	///  @param[out]	resultArea	The area id for the polygon.
+	/// @return The status flags for the operation.
+	dtStatus getPolyArea(dtPolyRef ref, unsigned char* resultArea) const;
+
+	/// Gets the size of the buffer required by #storeTileState to store the specified tile's state.
+	///  @param[in]	tile	The tile.
+	/// @return The size of the buffer required to store the state.
+	int getTileStateSize(const dtMeshTile* tile) const;
+	
+	/// Stores the non-structural state of the tile in the specified buffer. (Flags, area ids, etc.)
+	///  @param[in]		tile			The tile.
+	///  @param[out]	data			The buffer to store the tile's state in.
+	///  @param[in]		maxDataSize		The size of the data buffer. [Limit: >= #getTileStateSize]
+	/// @return The status flags for the operation.
+	dtStatus storeTileState(const dtMeshTile* tile, unsigned char* data, const int maxDataSize) const;
+	
+	/// Restores the state of the tile.
+	///  @param[in]	tile			The tile.
+	///  @param[in]	data			The new state. (Obtained from #storeTileState.)
+	///  @param[in]	maxDataSize		The size of the state within the data buffer.
+	/// @return The status flags for the operation.
+	dtStatus restoreTileState(dtMeshTile* tile, const unsigned char* data, const int maxDataSize);
+	
+	/// @}
+
+	/// @{
+	/// @name Encoding and Decoding
+	/// These functions are generally meant for internal use only.
+
+	/// Derives a standard polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	salt	The tile's salt value.
+	///  @param[in]	it		The index of the tile.
+	///  @param[in]	ip		The index of the polygon within the tile.
+	inline dtPolyRef encodePolyId(unsigned int salt, unsigned int it, unsigned int ip) const
+	{
+		return ((dtPolyRef)salt << (m_polyBits+m_tileBits)) | ((dtPolyRef)it << m_polyBits) | (dtPolyRef)ip;
+	}
+	
+	/// Decodes a standard polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref   The polygon reference to decode.
+	///  @param[out]	salt	The tile's salt value.
+	///  @param[out]	it		The index of the tile.
+	///  @param[out]	ip		The index of the polygon within the tile.
+	///  @see #encodePolyId
+	inline void decodePolyId(dtPolyRef ref, unsigned int& salt, unsigned int& it, unsigned int& ip) const
+	{
+		const dtPolyRef saltMask = ((dtPolyRef)1<<m_saltBits)-1;
+		const dtPolyRef tileMask = ((dtPolyRef)1<<m_tileBits)-1;
+		const dtPolyRef polyMask = ((dtPolyRef)1<<m_polyBits)-1;
+		salt = (unsigned int)((ref >> (m_polyBits+m_tileBits)) & saltMask);
+		it = (unsigned int)((ref >> m_polyBits) & tileMask);
+		ip = (unsigned int)(ref & polyMask);
+	}
+
+	/// Extracts a tile's salt value from the specified polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref		The polygon reference.
+	///  @see #encodePolyId
+	inline unsigned int decodePolyIdSalt(dtPolyRef ref) const
+	{
+		const dtPolyRef saltMask = ((dtPolyRef)1<<m_saltBits)-1;
+		return (unsigned int)((ref >> (m_polyBits+m_tileBits)) & saltMask);
+	}
+	
+	/// Extracts the tile's index from the specified polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref		The polygon reference.
+	///  @see #encodePolyId
+	inline unsigned int decodePolyIdTile(dtPolyRef ref) const
+	{
+		const dtPolyRef tileMask = ((dtPolyRef)1<<m_tileBits)-1;
+		return (unsigned int)((ref >> m_polyBits) & tileMask);
+	}
+	
+	/// Extracts the polygon's index (within its tile) from the specified polygon reference.
+	///  @note This function is generally meant for internal use only.
+	///  @param[in]	ref		The polygon reference.
+	///  @see #encodePolyId
+	inline unsigned int decodePolyIdPoly(dtPolyRef ref) const
+	{
+		const dtPolyRef polyMask = ((dtPolyRef)1<<m_polyBits)-1;
+		return (unsigned int)(ref & polyMask);
+	}
+
+	/// @}
+	
+private:
+
+	/// Returns pointer to tile in the tile array.
+	dtMeshTile* getTile(int i);
+
+	/// Returns neighbour tile based on side.
+	int getTilesAt(const int x, const int y,
+				   dtMeshTile** tiles, const int maxTiles) const;
+
+	/// Returns neighbour tile based on side.
+	int getNeighbourTilesAt(const int x, const int y, const int side,
+							dtMeshTile** tiles, const int maxTiles) const;
+	
+	/// Returns all polygons in neighbour tile based on portal defined by the segment.
+	int findConnectingPolys(const float* va, const float* vb,
+							const dtMeshTile* tile, int side,
+							dtPolyRef* con, float* conarea, int maxcon) const;
+	
+	/// Builds internal polygons links for a tile.
+	void connectIntLinks(dtMeshTile* tile);
+	/// Builds internal polygons links for a tile.
+	void baseOffMeshLinks(dtMeshTile* tile);
+
+	/// Builds external polygon links for a tile.
+	void connectExtLinks(dtMeshTile* tile, dtMeshTile* target, int side);
+	/// Builds external polygon links for a tile.
+	void connectExtOffMeshLinks(dtMeshTile* tile, dtMeshTile* target, int side);
+	
+	/// Removes external links at specified side.
+	void unconnectExtLinks(dtMeshTile* tile, dtMeshTile* target);
+	
+
+	// TODO: These methods are duplicates from dtNavMeshQuery, but are needed for off-mesh connection finding.
+	
+	/// Queries polygons within a tile.
+	int queryPolygonsInTile(const dtMeshTile* tile, const float* qmin, const float* qmax,
+							dtPolyRef* polys, const int maxPolys) const;
+	/// Find nearest polygon within a tile.
+	dtPolyRef findNearestPolyInTile(const dtMeshTile* tile, const float* center,
+									const float* extents, float* nearestPt) const;
+	/// Returns closest point on polygon.
+	void closestPointOnPolyInTile(const dtMeshTile* tile, unsigned int ip,
+								  const float* pos, float* closest) const;
+	
+	dtNavMeshParams m_params;			///< Current initialization params. TODO: do not store this info twice.
+	float m_orig[3];					///< Origin of the tile (0,0)
+	float m_tileWidth, m_tileHeight;	///< Dimensions of each tile.
+	int m_maxTiles;						///< Max number of tiles.
+	int m_tileLutSize;					///< Tile hash lookup size (must be pot).
+	int m_tileLutMask;					///< Tile hash lookup mask.
+
+	dtMeshTile** m_posLookup;			///< Tile hash lookup.
+	dtMeshTile* m_nextFree;				///< Freelist of tiles.
+	dtMeshTile* m_tiles;				///< List of tiles.
+		
+	unsigned int m_saltBits;			///< Number of salt bits in the tile ID.
+	unsigned int m_tileBits;			///< Number of tile bits in the tile ID.
+	unsigned int m_polyBits;			///< Number of poly bits in the tile ID.
+};
+
+/// Allocates a navigation mesh object using the Detour allocator.
+/// @return A navigation mesh that is ready for initialization, or null on failure.
+///  @ingroup detour
+dtNavMesh* dtAllocNavMesh();
+
+/// Frees the specified navigation mesh object using the Detour allocator.
+///  @param[in]	navmesh		A navigation mesh allocated using #dtAllocNavMesh
+///  @ingroup detour
+void dtFreeNavMesh(dtNavMesh* navmesh);
+
+#endif // DETOURNAVMESH_H
+
+///////////////////////////////////////////////////////////////////////////
+
+// This section contains detailed documentation for members that don't have
+// a source file. It reduces clutter in the main section of the header.
+
+/**
+
+@typedef dtPolyRef
+@par
+
+Polygon references are subject to the same invalidate/preserve/restore 
+rules that apply to #dtTileRef's.  If the #dtTileRef for the polygon's
+tile changes, the polygon reference becomes invalid.
+
+Changing a polygon's flags, area id, etc. does not impact its polygon
+reference.
+
+@typedef dtTileRef
+@par
+
+The following changes will invalidate a tile reference:
+
+- The referenced tile has been removed from the navigation mesh.
+- The navigation mesh has been initialized using a different set
+  of #dtNavMeshParams.
+
+A tile reference is preserved/restored if the tile is added to a navigation 
+mesh initialized with the original #dtNavMeshParams and is added at the
+original reference location. (E.g. The lastRef parameter is used with
+dtNavMesh::addTile.)
+
+Basically, if the storage structure of a tile changes, its associated
+tile reference changes.
+
+
+@var unsigned short dtPoly::neis[DT_VERTS_PER_POLYGON]
+@par
+
+Each entry represents data for the edge starting at the vertex of the same index. 
+E.g. The entry at index n represents the edge data for vertex[n] to vertex[n+1].
+
+A value of zero indicates the edge has no polygon connection. (It makes up the 
+border of the navigation mesh.)
+
+The information can be extracted as follows: 
+@code 
+neighborRef = neis[n] & 0xff; // Get the neighbor polygon reference.
+
+if (neis[n] & #DT_EX_LINK)
+{
+    // The edge is an external (portal) edge.
+}
+@endcode
+
+@var float dtMeshHeader::bvQuantFactor
+@par
+
+This value is used for converting between world and bounding volume coordinates.
+For example:
+@code
+const float cs = 1.0f / tile->header->bvQuantFactor;
+const dtBVNode* n = &tile->bvTree[i];
+if (n->i >= 0)
+{
+    // This is a leaf node.
+    float worldMinX = tile->header->bmin[0] + n->bmin[0]*cs;
+    float worldMinY = tile->header->bmin[0] + n->bmin[1]*cs;
+    // Etc...
+}
+@endcode
+
+@struct dtMeshTile
+@par
+
+Tiles generally only exist within the context of a dtNavMesh object.
+
+Some tile content is optional.  For example, a tile may not contain any
+off-mesh connections.  In this case the associated pointer will be null.
+
+If a detail mesh exists it will share vertices with the base polygon mesh.  
+Only the vertices unique to the detail mesh will be stored in #detailVerts.
+
+@warning Tiles returned by a dtNavMesh object are not guarenteed to be populated.
+For example: The tile at a location might not have been loaded yet, or may have been removed.
+In this case, pointers will be null.  So if in doubt, check the polygon count in the 
+tile's header to determine if a tile has polygons defined.
+
+@var float dtOffMeshConnection::pos[6]
+@par
+
+For a properly built navigation mesh, vertex A will always be within the bounds of the mesh. 
+Vertex B is not required to be within the bounds of the mesh.
+
+*/
diff --git a/Engine/lib/recast/Detour/Include/DetourNavMeshBuilder.h b/Engine/lib/recast/Detour/Include/DetourNavMeshBuilder.h
new file mode 100644
index 000000000..c80d17176
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourNavMeshBuilder.h
@@ -0,0 +1,148 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNAVMESHBUILDER_H
+#define DETOURNAVMESHBUILDER_H
+
+#include "DetourAlloc.h"
+
+/// Represents the source data used to build an navigation mesh tile.
+/// @ingroup detour
+struct dtNavMeshCreateParams
+{
+
+	/// @name Polygon Mesh Attributes
+	/// Used to create the base navigation graph.
+	/// See #rcPolyMesh for details related to these attributes.
+	/// @{
+
+	const unsigned short* verts;			///< The polygon mesh vertices. [(x, y, z) * #vertCount] [Unit: vx]
+	int vertCount;							///< The number vertices in the polygon mesh. [Limit: >= 3]
+	const unsigned short* polys;			///< The polygon data. [Size: #polyCount * 2 * #nvp]
+	const unsigned short* polyFlags;		///< The user defined flags assigned to each polygon. [Size: #polyCount]
+	const unsigned char* polyAreas;			///< The user defined area ids assigned to each polygon. [Size: #polyCount]
+	int polyCount;							///< Number of polygons in the mesh. [Limit: >= 1]
+	int nvp;								///< Number maximum number of vertices per polygon. [Limit: >= 3]
+
+	/// @}
+	/// @name Height Detail Attributes (Optional)
+	/// See #rcPolyMeshDetail for details related to these attributes.
+	/// @{
+
+	const unsigned int* detailMeshes;		///< The height detail sub-mesh data. [Size: 4 * #polyCount]
+	const float* detailVerts;				///< The detail mesh vertices. [Size: 3 * #detailVertsCount] [Unit: wu]
+	int detailVertsCount;					///< The number of vertices in the detail mesh.
+	const unsigned char* detailTris;		///< The detail mesh triangles. [Size: 4 * #detailTriCount]
+	int detailTriCount;						///< The number of triangles in the detail mesh.
+
+	/// @}
+	/// @name Off-Mesh Connections Attributes (Optional)
+	/// Used to define a custom point-to-point edge within the navigation graph, an 
+	/// off-mesh connection is a user defined traversable connection made up to two vertices, 
+	/// at least one of which resides within a navigation mesh polygon.
+	/// @{
+
+	/// Off-mesh connection vertices. [(ax, ay, az, bx, by, bz) * #offMeshConCount] [Unit: wu]
+	const float* offMeshConVerts;
+	/// Off-mesh connection radii. [Size: #offMeshConCount] [Unit: wu]
+	const float* offMeshConRad;
+	/// User defined flags assigned to the off-mesh connections. [Size: #offMeshConCount]
+	const unsigned short* offMeshConFlags;
+	/// User defined area ids assigned to the off-mesh connections. [Size: #offMeshConCount]
+	const unsigned char* offMeshConAreas;
+	/// The permitted travel direction of the off-mesh connections. [Size: #offMeshConCount]
+	///
+	/// 0 = Travel only from endpoint A to endpoint B.<br/>
+	/// #DT_OFFMESH_CON_BIDIR = Bidirectional travel.
+	const unsigned char* offMeshConDir;	
+	/// The user defined ids of the off-mesh connection. [Size: #offMeshConCount]
+	const unsigned int* offMeshConUserID;
+	/// The number of off-mesh connections. [Limit: >= 0]
+	int offMeshConCount;
+
+	/// @}
+	/// @name Tile Attributes
+	/// @note The tile grid/layer data can be left at zero if the destination is a single tile mesh.
+	/// @{
+
+	unsigned int userId;	///< The user defined id of the tile.
+	int tileX;				///< The tile's x-grid location within the multi-tile destination mesh. (Along the x-axis.)
+	int tileY;				///< The tile's y-grid location within the multi-tile desitation mesh. (Along the z-axis.)
+	int tileLayer;			///< The tile's layer within the layered destination mesh. [Limit: >= 0] (Along the y-axis.)
+	float bmin[3];			///< The minimum bounds of the tile. [(x, y, z)] [Unit: wu]
+	float bmax[3];			///< The maximum bounds of the tile. [(x, y, z)] [Unit: wu]
+
+	/// @}
+	/// @name General Configuration Attributes
+	/// @{
+
+	float walkableHeight;	///< The agent height. [Unit: wu]
+	float walkableRadius;	///< The agent radius. [Unit: wu]
+	float walkableClimb;	///< The agent maximum traversable ledge. (Up/Down) [Unit: wu]
+	float cs;				///< The xz-plane cell size of the polygon mesh. [Limit: > 0] [Unit: wu]
+	float ch;				///< The y-axis cell height of the polygon mesh. [Limit: > 0] [Unit: wu]
+
+	/// True if a bounding volume tree should be built for the tile.
+	/// @note The BVTree is not normally needed for layered navigation meshes.
+	bool buildBvTree;
+
+	/// @}
+};
+
+/// Builds navigation mesh tile data from the provided tile creation data.
+/// @ingroup detour
+///  @param[in]		params		Tile creation data.
+///  @param[out]	outData		The resulting tile data.
+///  @param[out]	outDataSize	The size of the tile data array.
+/// @return True if the tile data was successfully created.
+bool dtCreateNavMeshData(dtNavMeshCreateParams* params, unsigned char** outData, int* outDataSize);
+
+/// Swaps the endianess of the tile data's header (#dtMeshHeader).
+///  @param[in,out]	data		The tile data array.
+///  @param[in]		dataSize	The size of the data array.
+bool dtNavMeshHeaderSwapEndian(unsigned char* data, const int dataSize);
+
+/// Swaps endianess of the tile data.
+///  @param[in,out]	data		The tile data array.
+///  @param[in]		dataSize	The size of the data array.
+bool dtNavMeshDataSwapEndian(unsigned char* data, const int dataSize);
+
+#endif // DETOURNAVMESHBUILDER_H
+
+// This section contains detailed documentation for members that don't have
+// a source file. It reduces clutter in the main section of the header.
+
+/**
+
+@struct dtNavMeshCreateParams
+@par
+
+This structure is used to marshal data between the Recast mesh generation pipeline and Detour navigation components.
+
+See the rcPolyMesh and rcPolyMeshDetail documentation for detailed information related to mesh structure.
+
+Units are usually in voxels (vx) or world units (wu). The units for voxels, grid size, and cell size 
+are all based on the values of #cs and #ch.
+
+The standard navigation mesh build process is to create tile data using dtCreateNavMeshData, then add the tile 
+to a navigation mesh using either the dtNavMesh single tile <tt>init()</tt> function or the dtNavMesh::addTile()
+function.
+
+@see dtCreateNavMeshData
+
+*/
\ No newline at end of file
diff --git a/Engine/lib/recast/Detour/Include/DetourNavMeshQuery.h b/Engine/lib/recast/Detour/Include/DetourNavMeshQuery.h
new file mode 100644
index 000000000..b856b7027
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourNavMeshQuery.h
@@ -0,0 +1,477 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNAVMESHQUERY_H
+#define DETOURNAVMESHQUERY_H
+
+#include "DetourNavMesh.h"
+#include "DetourStatus.h"
+
+
+// Define DT_VIRTUAL_QUERYFILTER if you wish to derive a custom filter from dtQueryFilter.
+// On certain platforms indirect or virtual function call is expensive. The default
+// setting is to use non-virtual functions, the actual implementations of the functions
+// are declared as inline for maximum speed. 
+
+//#define DT_VIRTUAL_QUERYFILTER 1
+
+/// Defines polygon filtering and traversal costs for navigation mesh query operations.
+/// @ingroup detour
+class dtQueryFilter
+{
+	float m_areaCost[DT_MAX_AREAS];		///< Cost per area type. (Used by default implementation.)
+	unsigned short m_includeFlags;		///< Flags for polygons that can be visited. (Used by default implementation.)
+	unsigned short m_excludeFlags;		///< Flags for polygons that should not be visted. (Used by default implementation.)
+	
+public:
+	dtQueryFilter();
+	
+	/// Returns true if the polygon can be visited.  (I.e. Is traversable.)
+	///  @param[in]		ref		The reference id of the polygon test.
+	///  @param[in]		tile	The tile containing the polygon.
+	///  @param[in]		poly  The polygon to test.
+#ifdef DT_VIRTUAL_QUERYFILTER
+	virtual bool passFilter(const dtPolyRef ref,
+							const dtMeshTile* tile,
+							const dtPoly* poly) const;
+#else
+	bool passFilter(const dtPolyRef ref,
+					const dtMeshTile* tile,
+					const dtPoly* poly) const;
+#endif
+
+	/// Returns cost to move from the beginning to the end of a line segment
+	/// that is fully contained within a polygon.
+	///  @param[in]		pa			The start position on the edge of the previous and current polygon. [(x, y, z)]
+	///  @param[in]		pb			The end position on the edge of the current and next polygon. [(x, y, z)]
+	///  @param[in]		prevRef		The reference id of the previous polygon. [opt]
+	///  @param[in]		prevTile	The tile containing the previous polygon. [opt]
+	///  @param[in]		prevPoly	The previous polygon. [opt]
+	///  @param[in]		curRef		The reference id of the current polygon.
+	///  @param[in]		curTile		The tile containing the current polygon.
+	///  @param[in]		curPoly		The current polygon.
+	///  @param[in]		nextRef		The refernece id of the next polygon. [opt]
+	///  @param[in]		nextTile	The tile containing the next polygon. [opt]
+	///  @param[in]		nextPoly	The next polygon. [opt]
+#ifdef DT_VIRTUAL_QUERYFILTER
+	virtual float getCost(const float* pa, const float* pb,
+						  const dtPolyRef prevRef, const dtMeshTile* prevTile, const dtPoly* prevPoly,
+						  const dtPolyRef curRef, const dtMeshTile* curTile, const dtPoly* curPoly,
+						  const dtPolyRef nextRef, const dtMeshTile* nextTile, const dtPoly* nextPoly) const;
+#else
+	float getCost(const float* pa, const float* pb,
+				  const dtPolyRef prevRef, const dtMeshTile* prevTile, const dtPoly* prevPoly,
+				  const dtPolyRef curRef, const dtMeshTile* curTile, const dtPoly* curPoly,
+				  const dtPolyRef nextRef, const dtMeshTile* nextTile, const dtPoly* nextPoly) const;
+#endif
+
+	/// @name Getters and setters for the default implementation data.
+	///@{
+
+	/// Returns the traversal cost of the area.
+	///  @param[in]		i		The id of the area.
+	/// @returns The traversal cost of the area.
+	inline float getAreaCost(const int i) const { return m_areaCost[i]; }
+
+	/// Sets the traversal cost of the area.
+	///  @param[in]		i		The id of the area.
+	///  @param[in]		cost	The new cost of traversing the area.
+	inline void setAreaCost(const int i, const float cost) { m_areaCost[i] = cost; } 
+
+	/// Returns the include flags for the filter.
+	/// Any polygons that include one or more of these flags will be
+	/// included in the operation.
+	inline unsigned short getIncludeFlags() const { return m_includeFlags; }
+
+	/// Sets the include flags for the filter.
+	/// @param[in]		flags	The new flags.
+	inline void setIncludeFlags(const unsigned short flags) { m_includeFlags = flags; }
+
+	/// Returns the exclude flags for the filter.
+	/// Any polygons that include one ore more of these flags will be
+	/// excluded from the operation.
+	inline unsigned short getExcludeFlags() const { return m_excludeFlags; }
+
+	/// Sets the exclude flags for the filter.
+	/// @param[in]		flags		The new flags.
+	inline void setExcludeFlags(const unsigned short flags) { m_excludeFlags = flags; }	
+
+	///@}
+
+};
+
+/// Provides the ability to perform pathfinding related queries against
+/// a navigation mesh.
+/// @ingroup detour
+class dtNavMeshQuery
+{
+public:
+	dtNavMeshQuery();
+	~dtNavMeshQuery();
+	
+	/// Initializes the query object.
+	///  @param[in]		nav			Pointer to the dtNavMesh object to use for all queries.
+	///  @param[in]		maxNodes	Maximum number of search nodes. [Limits: 0 < value <= 65536]
+	/// @returns The status flags for the query.
+	dtStatus init(const dtNavMesh* nav, const int maxNodes);
+	
+	/// @name Standard Pathfinding Functions
+	// /@{
+
+	/// Finds a path from the start polygon to the end polygon.
+	///  @param[in]		startRef	The refrence id of the start polygon.
+	///  @param[in]		endRef		The reference id of the end polygon.
+	///  @param[in]		startPos	A position within the start polygon. [(x, y, z)]
+	///  @param[in]		endPos		A position within the end polygon. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	path		An ordered list of polygon references representing the path. (Start to end.) 
+	///  							[(polyRef) * @p pathCount]
+	///  @param[out]	pathCount	The number of polygons returned in the @p path array.
+	///  @param[in]		maxPath		The maximum number of polygons the @p path array can hold. [Limit: >= 1]
+	dtStatus findPath(dtPolyRef startRef, dtPolyRef endRef,
+					  const float* startPos, const float* endPos,
+					  const dtQueryFilter* filter,
+					  dtPolyRef* path, int* pathCount, const int maxPath) const;
+	
+	/// Finds the straight path from the start to the end position within the polygon corridor.
+	///  @param[in]		startPos			Path start position. [(x, y, z)]
+	///  @param[in]		endPos				Path end position. [(x, y, z)]
+	///  @param[in]		path				An array of polygon references that represent the path corridor.
+	///  @param[in]		pathSize			The number of polygons in the @p path array.
+	///  @param[out]	straightPath		Points describing the straight path. [(x, y, z) * @p straightPathCount].
+	///  @param[out]	straightPathFlags	Flags describing each point. (See: #dtStraightPathFlags) [opt]
+	///  @param[out]	straightPathRefs	The reference id of the polygon that is being entered at each point. [opt]
+	///  @param[out]	straightPathCount	The number of points in the straight path.
+	///  @param[in]		maxStraightPath		The maximum number of points the straight path arrays can hold.  [Limit: > 0]
+	/// @returns The status flags for the query.
+	dtStatus findStraightPath(const float* startPos, const float* endPos,
+							  const dtPolyRef* path, const int pathSize,
+							  float* straightPath, unsigned char* straightPathFlags, dtPolyRef* straightPathRefs,
+							  int* straightPathCount, const int maxStraightPath) const;
+
+	///@}
+	/// @name Sliced Pathfinding Functions
+	/// Common use case:
+	///	-# Call initSlicedFindPath() to initialize the sliced path query.
+	///	-# Call updateSlicedFindPath() until it returns complete.
+	///	-# Call finalizeSlicedFindPath() to get the path.
+	///@{ 
+
+	/// Intializes a sliced path query.
+	///  @param[in]		startRef	The refrence id of the start polygon.
+	///  @param[in]		endRef		The reference id of the end polygon.
+	///  @param[in]		startPos	A position within the start polygon. [(x, y, z)]
+	///  @param[in]		endPos		A position within the end polygon. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	/// @returns The status flags for the query.
+	dtStatus initSlicedFindPath(dtPolyRef startRef, dtPolyRef endRef,
+								const float* startPos, const float* endPos,
+								const dtQueryFilter* filter);
+
+	/// Updates an in-progress sliced path query.
+	///  @param[in]		maxIter		The maximum number of iterations to perform.
+	///  @param[out]	doneIters	The actual number of iterations completed. [opt]
+	/// @returns The status flags for the query.
+	dtStatus updateSlicedFindPath(const int maxIter, int* doneIters);
+
+	/// Finalizes and returns the results of a sliced path query.
+	///  @param[out]	path		An ordered list of polygon references representing the path. (Start to end.) 
+	///  							[(polyRef) * @p pathCount]
+	///  @param[out]	pathCount	The number of polygons returned in the @p path array.
+	///  @param[in]		maxPath		The max number of polygons the path array can hold. [Limit: >= 1]
+	/// @returns The status flags for the query.
+	dtStatus finalizeSlicedFindPath(dtPolyRef* path, int* pathCount, const int maxPath);
+	
+	/// Finalizes and returns the results of an incomplete sliced path query, returning the path to the furthest
+	/// polygon on the existing path that was visited during the search.
+	///  @param[out]	existing		An array of polygon references for the existing path.
+	///  @param[out]	existingSize	The number of polygon in the @p existing array.
+	///  @param[out]	path			An ordered list of polygon references representing the path. (Start to end.) 
+	///  								[(polyRef) * @p pathCount]
+	///  @param[out]	pathCount		The number of polygons returned in the @p path array.
+	///  @param[in]		maxPath			The max number of polygons the @p path array can hold. [Limit: >= 1]
+	/// @returns The status flags for the query.
+	dtStatus finalizeSlicedFindPathPartial(const dtPolyRef* existing, const int existingSize,
+										   dtPolyRef* path, int* pathCount, const int maxPath);
+
+	///@}
+	/// @name Dijkstra Search Functions
+	/// @{ 
+
+	/// Finds the polygons along the navigation graph that touch the specified circle.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		centerPos		The center of the search circle. [(x, y, z)]
+	///  @param[in]		radius			The radius of the search circle.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultRef		The reference ids of the polygons touched by the circle. [opt]
+	///  @param[out]	resultParent	The reference ids of the parent polygons for each result. 
+	///  								Zero if a result polygon has no parent. [opt]
+	///  @param[out]	resultCost		The search cost from @p centerPos to the polygon. [opt]
+	///  @param[out]	resultCount		The number of polygons found. [opt]
+	///  @param[in]		maxResult		The maximum number of polygons the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus findPolysAroundCircle(dtPolyRef startRef, const float* centerPos, const float radius,
+								   const dtQueryFilter* filter,
+								   dtPolyRef* resultRef, dtPolyRef* resultParent, float* resultCost,
+								   int* resultCount, const int maxResult) const;
+	
+	/// Finds the polygons along the naviation graph that touch the specified convex polygon.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		verts			The vertices describing the convex polygon. (CCW) 
+	///  								[(x, y, z) * @p nverts]
+	///  @param[in]		nverts			The number of vertices in the polygon.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultRef		The reference ids of the polygons touched by the search polygon. [opt]
+	///  @param[out]	resultParent	The reference ids of the parent polygons for each result. Zero if a 
+	///  								result polygon has no parent. [opt]
+	///  @param[out]	resultCost		The search cost from the centroid point to the polygon. [opt]
+	///  @param[out]	resultCount		The number of polygons found.
+	///  @param[in]		maxResult		The maximum number of polygons the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus findPolysAroundShape(dtPolyRef startRef, const float* verts, const int nverts,
+								  const dtQueryFilter* filter,
+								  dtPolyRef* resultRef, dtPolyRef* resultParent, float* resultCost,
+								  int* resultCount, const int maxResult) const;
+	
+	/// @}
+	/// @name Local Query Functions
+	///@{
+
+	/// Finds the polygon nearest to the specified center point.
+	///  @param[in]		center		The center of the search box. [(x, y, z)]
+	///  @param[in]		extents		The search distance along each axis. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	nearestRef	The reference id of the nearest polygon.
+	///  @param[out]	nearestPt	The nearest point on the polygon. [opt] [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus findNearestPoly(const float* center, const float* extents,
+							 const dtQueryFilter* filter,
+							 dtPolyRef* nearestRef, float* nearestPt) const;
+	
+	/// Finds polygons that overlap the search box.
+	///  @param[in]		center		The center of the search box. [(x, y, z)]
+	///  @param[in]		extents		The search distance along each axis. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	polys		The reference ids of the polygons that overlap the query box.
+	///  @param[out]	polyCount	The number of polygons in the search result.
+	///  @param[in]		maxPolys	The maximum number of polygons the search result can hold.
+	/// @returns The status flags for the query.
+	dtStatus queryPolygons(const float* center, const float* extents,
+						   const dtQueryFilter* filter,
+						   dtPolyRef* polys, int* polyCount, const int maxPolys) const;
+
+	/// Finds the non-overlapping navigation polygons in the local neighbourhood around the center position.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		centerPos		The center of the query circle. [(x, y, z)]
+	///  @param[in]		radius			The radius of the query circle.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultRef		The reference ids of the polygons touched by the circle.
+	///  @param[out]	resultParent	The reference ids of the parent polygons for each result. 
+	///  								Zero if a result polygon has no parent. [opt]
+	///  @param[out]	resultCount		The number of polygons found.
+	///  @param[in]		maxResult		The maximum number of polygons the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus findLocalNeighbourhood(dtPolyRef startRef, const float* centerPos, const float radius,
+									const dtQueryFilter* filter,
+									dtPolyRef* resultRef, dtPolyRef* resultParent,
+									int* resultCount, const int maxResult) const;
+
+	/// Moves from the start to the end position constrained to the navigation mesh.
+	///  @param[in]		startRef		The reference id of the start polygon.
+	///  @param[in]		startPos		A position of the mover within the start polygon. [(x, y, x)]
+	///  @param[in]		endPos			The desired end position of the mover. [(x, y, z)]
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	resultPos		The result position of the mover. [(x, y, z)]
+	///  @param[out]	visited			The reference ids of the polygons visited during the move.
+	///  @param[out]	visitedCount	The number of polygons visited during the move.
+	///  @param[in]		maxVisitedSize	The maximum number of polygons the @p visited array can hold.
+	/// @returns The status flags for the query.
+	dtStatus moveAlongSurface(dtPolyRef startRef, const float* startPos, const float* endPos,
+							  const dtQueryFilter* filter,
+							  float* resultPos, dtPolyRef* visited, int* visitedCount, const int maxVisitedSize) const;
+	
+	/// Casts a 'walkability' ray along the surface of the navigation mesh from 
+	/// the start position toward the end position.
+	///  @param[in]		startRef	The reference id of the start polygon.
+	///  @param[in]		startPos	A position within the start polygon representing 
+	///  							the start of the ray. [(x, y, z)]
+	///  @param[in]		endPos		The position to cast the ray toward. [(x, y, z)]
+	///  @param[out]	t			The hit parameter. (FLT_MAX if no wall hit.)
+	///  @param[out]	hitNormal	The normal of the nearest wall hit. [(x, y, z)]
+	///  @param[in]		filter		The polygon filter to apply to the query.
+	///  @param[out]	path		The reference ids of the visited polygons. [opt]
+	///  @param[out]	pathCount	The number of visited polygons. [opt]
+	///  @param[in]		maxPath		The maximum number of polygons the @p path array can hold.
+	/// @returns The status flags for the query.
+	dtStatus raycast(dtPolyRef startRef, const float* startPos, const float* endPos,
+					 const dtQueryFilter* filter,
+					 float* t, float* hitNormal, dtPolyRef* path, int* pathCount, const int maxPath) const;
+	
+	/// Finds the distance from the specified position to the nearest polygon wall.
+	///  @param[in]		startRef		The reference id of the polygon containing @p centerPos.
+	///  @param[in]		centerPos		The center of the search circle. [(x, y, z)]
+	///  @param[in]		maxRadius		The radius of the search circle.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	hitDist			The distance to the nearest wall from @p centerPos.
+	///  @param[out]	hitPos			The nearest position on the wall that was hit. [(x, y, z)]
+	///  @param[out]	hitNormal		The normalized ray formed from the wall point to the 
+	///  								source point. [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus findDistanceToWall(dtPolyRef startRef, const float* centerPos, const float maxRadius,
+								const dtQueryFilter* filter,
+								float* hitDist, float* hitPos, float* hitNormal) const;
+	
+	/// Returns the segments for the specified polygon, optionally including portals.
+	///  @param[in]		ref				The reference id of the polygon.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[out]	segmentVerts	The segments. [(ax, ay, az, bx, by, bz) * segmentCount]
+	///  @param[out]	segmentRefs		The reference ids of each segment's neighbor polygon. 
+	///  								Or zero if the segment is a wall. [opt] [(parentRef) * @p segmentCount] 
+	///  @param[out]	segmentCount	The number of segments returned.
+	///  @param[in]		maxSegments		The maximum number of segments the result arrays can hold.
+	/// @returns The status flags for the query.
+	dtStatus getPolyWallSegments(dtPolyRef ref, const dtQueryFilter* filter,
+								 float* segmentVerts, dtPolyRef* segmentRefs, int* segmentCount,
+								 const int maxSegments) const;
+
+	/// Returns random location on navmesh.
+	/// Polygons are chosen weighted by area. The search runs in linear related to number of polygon.
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[in]		frand			Function returning a random number [0..1).
+	///  @param[out]	randomRef		The reference id of the random location.
+	///  @param[out]	randomPt		The random location. 
+	/// @returns The status flags for the query.
+	dtStatus findRandomPoint(const dtQueryFilter* filter, float (*frand)(),
+							 dtPolyRef* randomRef, float* randomPt) const;
+
+	/// Returns random location on navmesh within the reach of specified location.
+	/// Polygons are chosen weighted by area. The search runs in linear related to number of polygon.
+	/// The location is not exactly constrained by the circle, but it limits the visited polygons.
+	///  @param[in]		startRef		The reference id of the polygon where the search starts.
+	///  @param[in]		centerPos		The center of the search circle. [(x, y, z)]
+	///  @param[in]		filter			The polygon filter to apply to the query.
+	///  @param[in]		frand			Function returning a random number [0..1).
+	///  @param[out]	randomRef		The reference id of the random location.
+	///  @param[out]	randomPt		The random location. [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus findRandomPointAroundCircle(dtPolyRef startRef, const float* centerPos, const float maxRadius,
+										 const dtQueryFilter* filter, float (*frand)(),
+										 dtPolyRef* randomRef, float* randomPt) const;
+	
+	/// Finds the closest point on the specified polygon.
+	///  @param[in]		ref			The reference id of the polygon.
+	///  @param[in]		pos			The position to check. [(x, y, z)]
+	///  @param[out]	closest		The closest point on the polygon. [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus closestPointOnPoly(dtPolyRef ref, const float* pos, float* closest) const;
+	
+	/// Returns a point on the boundary closest to the source point if the source point is outside the 
+	/// polygon's xz-bounds.
+	///  @param[in]		ref			The reference id to the polygon.
+	///  @param[in]		pos			The position to check. [(x, y, z)]
+	///  @param[out]	closest		The closest point. [(x, y, z)]
+	/// @returns The status flags for the query.
+	dtStatus closestPointOnPolyBoundary(dtPolyRef ref, const float* pos, float* closest) const;
+	
+	/// Gets the height of the polygon at the provided position using the height detail. (Most accurate.)
+	///  @param[in]		ref			The reference id of the polygon.
+	///  @param[in]		pos			A position within the xz-bounds of the polygon. [(x, y, z)]
+	///  @param[out]	height		The height at the surface of the polygon.
+	/// @returns The status flags for the query.
+	dtStatus getPolyHeight(dtPolyRef ref, const float* pos, float* height) const;
+
+	/// @}
+	/// @name Miscellaneous Functions
+	/// @{
+
+	/// Returns true if the polygon reference is valid and passes the filter restrictions.
+	///  @param[in]		ref			The polygon reference to check.
+	///  @param[in]		filter		The filter to apply.
+	bool isValidPolyRef(dtPolyRef ref, const dtQueryFilter* filter) const;
+
+	/// Returns true if the polygon reference is in the closed list. 
+	///  @param[in]		ref		The reference id of the polygon to check.
+	/// @returns True if the polygon is in closed list.
+	bool isInClosedList(dtPolyRef ref) const;
+	
+	/// Gets the node pool.
+	/// @returns The node pool.
+	class dtNodePool* getNodePool() const { return m_nodePool; }
+	
+	/// Gets the navigation mesh the query object is using.
+	/// @return The navigation mesh the query object is using.
+	const dtNavMesh* getAttachedNavMesh() const { return m_nav; }
+
+	/// @}
+	
+private:
+	
+	/// Returns neighbour tile based on side.
+	dtMeshTile* getNeighbourTileAt(int x, int y, int side) const;
+
+	/// Queries polygons within a tile.
+	int queryPolygonsInTile(const dtMeshTile* tile, const float* qmin, const float* qmax, const dtQueryFilter* filter,
+							dtPolyRef* polys, const int maxPolys) const;
+	/// Find nearest polygon within a tile.
+	dtPolyRef findNearestPolyInTile(const dtMeshTile* tile, const float* center, const float* extents,
+									const dtQueryFilter* filter, float* nearestPt) const;
+	/// Returns closest point on polygon.
+	void closestPointOnPolyInTile(const dtMeshTile* tile, const dtPoly* poly, const float* pos, float* closest) const;
+	
+	/// Returns portal points between two polygons.
+	dtStatus getPortalPoints(dtPolyRef from, dtPolyRef to, float* left, float* right,
+							 unsigned char& fromType, unsigned char& toType) const;
+	dtStatus getPortalPoints(dtPolyRef from, const dtPoly* fromPoly, const dtMeshTile* fromTile,
+							 dtPolyRef to, const dtPoly* toPoly, const dtMeshTile* toTile,
+							 float* left, float* right) const;
+	
+	/// Returns edge mid point between two polygons.
+	dtStatus getEdgeMidPoint(dtPolyRef from, dtPolyRef to, float* mid) const;
+	dtStatus getEdgeMidPoint(dtPolyRef from, const dtPoly* fromPoly, const dtMeshTile* fromTile,
+							 dtPolyRef to, const dtPoly* toPoly, const dtMeshTile* toTile,
+							 float* mid) const;
+	
+	const dtNavMesh* m_nav;				///< Pointer to navmesh data.
+
+	struct dtQueryData
+	{
+		dtStatus status;
+		struct dtNode* lastBestNode;
+		float lastBestNodeCost;
+		dtPolyRef startRef, endRef;
+		float startPos[3], endPos[3];
+		const dtQueryFilter* filter;
+	};
+	dtQueryData m_query;				///< Sliced query state.
+
+	class dtNodePool* m_tinyNodePool;	///< Pointer to small node pool.
+	class dtNodePool* m_nodePool;		///< Pointer to node pool.
+	class dtNodeQueue* m_openList;		///< Pointer to open list queue.
+};
+
+/// Allocates a query object using the Detour allocator.
+/// @return An allocated query object, or null on failure.
+/// @ingroup detour
+dtNavMeshQuery* dtAllocNavMeshQuery();
+
+/// Frees the specified query object using the Detour allocator.
+///  @param[in]		query		A query object allocated using #dtAllocNavMeshQuery
+/// @ingroup detour
+void dtFreeNavMeshQuery(dtNavMeshQuery* query);
+
+#endif // DETOURNAVMESHQUERY_H
diff --git a/Engine/lib/recast/Detour/Include/DetourNode.h b/Engine/lib/recast/Detour/Include/DetourNode.h
new file mode 100644
index 000000000..b68c922d0
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourNode.h
@@ -0,0 +1,159 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURNODE_H
+#define DETOURNODE_H
+
+#include "DetourNavMesh.h"
+
+enum dtNodeFlags
+{
+	DT_NODE_OPEN = 0x01,
+	DT_NODE_CLOSED = 0x02,
+};
+
+typedef unsigned short dtNodeIndex;
+static const dtNodeIndex DT_NULL_IDX = (dtNodeIndex)~0;
+
+struct dtNode
+{
+	float pos[3];				///< Position of the node.
+	float cost;					///< Cost from previous node to current node.
+	float total;				///< Cost up to the node.
+	unsigned int pidx : 30;		///< Index to parent node.
+	unsigned int flags : 2;		///< Node flags 0/open/closed.
+	dtPolyRef id;				///< Polygon ref the node corresponds to.
+};
+
+
+class dtNodePool
+{
+public:
+	dtNodePool(int maxNodes, int hashSize);
+	~dtNodePool();
+	inline void operator=(const dtNodePool&) {}
+	void clear();
+	dtNode* getNode(dtPolyRef id);
+	dtNode* findNode(dtPolyRef id);
+
+	inline unsigned int getNodeIdx(const dtNode* node) const
+	{
+		if (!node) return 0;
+		return (unsigned int)(node - m_nodes)+1;
+	}
+
+	inline dtNode* getNodeAtIdx(unsigned int idx)
+	{
+		if (!idx) return 0;
+		return &m_nodes[idx-1];
+	}
+
+	inline const dtNode* getNodeAtIdx(unsigned int idx) const
+	{
+		if (!idx) return 0;
+		return &m_nodes[idx-1];
+	}
+	
+	inline int getMemUsed() const
+	{
+		return sizeof(*this) +
+			sizeof(dtNode)*m_maxNodes +
+			sizeof(dtNodeIndex)*m_maxNodes +
+			sizeof(dtNodeIndex)*m_hashSize;
+	}
+	
+	inline int getMaxNodes() const { return m_maxNodes; }
+	
+	inline int getHashSize() const { return m_hashSize; }
+	inline dtNodeIndex getFirst(int bucket) const { return m_first[bucket]; }
+	inline dtNodeIndex getNext(int i) const { return m_next[i]; }
+	
+private:
+	
+	dtNode* m_nodes;
+	dtNodeIndex* m_first;
+	dtNodeIndex* m_next;
+	const int m_maxNodes;
+	const int m_hashSize;
+	int m_nodeCount;
+};
+
+class dtNodeQueue
+{
+public:
+	dtNodeQueue(int n);
+	~dtNodeQueue();
+	inline void operator=(dtNodeQueue&) {}
+	
+	inline void clear()
+	{
+		m_size = 0;
+	}
+	
+	inline dtNode* top()
+	{
+		return m_heap[0];
+	}
+	
+	inline dtNode* pop()
+	{
+		dtNode* result = m_heap[0];
+		m_size--;
+		trickleDown(0, m_heap[m_size]);
+		return result;
+	}
+	
+	inline void push(dtNode* node)
+	{
+		m_size++;
+		bubbleUp(m_size-1, node);
+	}
+	
+	inline void modify(dtNode* node)
+	{
+		for (int i = 0; i < m_size; ++i)
+		{
+			if (m_heap[i] == node)
+			{
+				bubbleUp(i, node);
+				return;
+			}
+		}
+	}
+	
+	inline bool empty() const { return m_size == 0; }
+	
+	inline int getMemUsed() const
+	{
+		return sizeof(*this) +
+		sizeof(dtNode*)*(m_capacity+1);
+	}
+	
+	inline int getCapacity() const { return m_capacity; }
+	
+private:
+	void bubbleUp(int i, dtNode* node);
+	void trickleDown(int i, dtNode* node);
+	
+	dtNode** m_heap;
+	const int m_capacity;
+	int m_size;
+};		
+
+
+#endif // DETOURNODE_H
diff --git a/Engine/lib/recast/Detour/Include/DetourStatus.h b/Engine/lib/recast/Detour/Include/DetourStatus.h
new file mode 100644
index 000000000..af822c4a9
--- /dev/null
+++ b/Engine/lib/recast/Detour/Include/DetourStatus.h
@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURSTATUS_H
+#define DETOURSTATUS_H
+
+typedef unsigned int dtStatus;
+
+// High level status.
+static const unsigned int DT_FAILURE = 1u << 31;			// Operation failed.
+static const unsigned int DT_SUCCESS = 1u << 30;			// Operation succeed.
+static const unsigned int DT_IN_PROGRESS = 1u << 29;		// Operation still in progress.
+
+// Detail information for status.
+static const unsigned int DT_STATUS_DETAIL_MASK = 0x0ffffff;
+static const unsigned int DT_WRONG_MAGIC = 1 << 0;		// Input data is not recognized.
+static const unsigned int DT_WRONG_VERSION = 1 << 1;	// Input data is in wrong version.
+static const unsigned int DT_OUT_OF_MEMORY = 1 << 2;	// Operation ran out of memory.
+static const unsigned int DT_INVALID_PARAM = 1 << 3;	// An input parameter was invalid.
+static const unsigned int DT_BUFFER_TOO_SMALL = 1 << 4;	// Result buffer for the query was too small to store all results.
+static const unsigned int DT_OUT_OF_NODES = 1 << 5;		// Query ran out of nodes during search.
+static const unsigned int DT_PARTIAL_RESULT = 1 << 6;	// Query did not reach the end location, returning best guess. 
+
+
+// Returns true of status is success.
+inline bool dtStatusSucceed(dtStatus status)
+{
+	return (status & DT_SUCCESS) != 0;
+}
+
+// Returns true of status is failure.
+inline bool dtStatusFailed(dtStatus status)
+{
+	return (status & DT_FAILURE) != 0;
+}
+
+// Returns true of status is in progress.
+inline bool dtStatusInProgress(dtStatus status)
+{
+	return (status & DT_IN_PROGRESS) != 0;
+}
+
+// Returns true if specific detail is set.
+inline bool dtStatusDetail(dtStatus status, unsigned int detail)
+{
+	return (status & detail) != 0;
+}
+
+#endif // DETOURSTATUS_H
diff --git a/Engine/lib/recast/Detour/Source/DetourAlloc.cpp b/Engine/lib/recast/Detour/Source/DetourAlloc.cpp
new file mode 100644
index 000000000..5f671df5b
--- /dev/null
+++ b/Engine/lib/recast/Detour/Source/DetourAlloc.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <stdlib.h>
+#include "DetourAlloc.h"
+
+static void *dtAllocDefault(int size, dtAllocHint)
+{
+	return malloc(size);
+}
+
+static void dtFreeDefault(void *ptr)
+{
+	free(ptr);
+}
+
+static dtAllocFunc* sAllocFunc = dtAllocDefault;
+static dtFreeFunc* sFreeFunc = dtFreeDefault;
+
+void dtAllocSetCustom(dtAllocFunc *allocFunc, dtFreeFunc *freeFunc)
+{
+	sAllocFunc = allocFunc ? allocFunc : dtAllocDefault;
+	sFreeFunc = freeFunc ? freeFunc : dtFreeDefault;
+}
+
+void* dtAlloc(int size, dtAllocHint hint)
+{
+	return sAllocFunc(size, hint);
+}
+
+void dtFree(void* ptr)
+{
+	if (ptr)
+		sFreeFunc(ptr);
+}
diff --git a/Engine/lib/recast/Detour/Source/DetourCommon.cpp b/Engine/lib/recast/Detour/Source/DetourCommon.cpp
new file mode 100644
index 000000000..e003bf60c
--- /dev/null
+++ b/Engine/lib/recast/Detour/Source/DetourCommon.cpp
@@ -0,0 +1,376 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <math.h>
+#include "DetourCommon.h"
+
+//////////////////////////////////////////////////////////////////////////////////////////
+
+float dtSqrt(float x)
+{
+	return sqrtf(x);
+}
+
+void dtClosestPtPointTriangle(float* closest, const float* p,
+							  const float* a, const float* b, const float* c)
+{
+	// Check if P in vertex region outside A
+	float ab[3], ac[3], ap[3];
+	dtVsub(ab, b, a);
+	dtVsub(ac, c, a);
+	dtVsub(ap, p, a);
+	float d1 = dtVdot(ab, ap);
+	float d2 = dtVdot(ac, ap);
+	if (d1 <= 0.0f && d2 <= 0.0f)
+	{
+		// barycentric coordinates (1,0,0)
+		dtVcopy(closest, a);
+		return;
+	}
+	
+	// Check if P in vertex region outside B
+	float bp[3];
+	dtVsub(bp, p, b);
+	float d3 = dtVdot(ab, bp);
+	float d4 = dtVdot(ac, bp);
+	if (d3 >= 0.0f && d4 <= d3)
+	{
+		// barycentric coordinates (0,1,0)
+		dtVcopy(closest, b);
+		return;
+	}
+	
+	// Check if P in edge region of AB, if so return projection of P onto AB
+	float vc = d1*d4 - d3*d2;
+	if (vc <= 0.0f && d1 >= 0.0f && d3 <= 0.0f)
+	{
+		// barycentric coordinates (1-v,v,0)
+		float v = d1 / (d1 - d3);
+		closest[0] = a[0] + v * ab[0];
+		closest[1] = a[1] + v * ab[1];
+		closest[2] = a[2] + v * ab[2];
+		return;
+	}
+	
+	// Check if P in vertex region outside C
+	float cp[3];
+	dtVsub(cp, p, c);
+	float d5 = dtVdot(ab, cp);
+	float d6 = dtVdot(ac, cp);
+	if (d6 >= 0.0f && d5 <= d6)
+	{
+		// barycentric coordinates (0,0,1)
+		dtVcopy(closest, c);
+		return;
+	}
+	
+	// Check if P in edge region of AC, if so return projection of P onto AC
+	float vb = d5*d2 - d1*d6;
+	if (vb <= 0.0f && d2 >= 0.0f && d6 <= 0.0f)
+	{
+		// barycentric coordinates (1-w,0,w)
+		float w = d2 / (d2 - d6);
+		closest[0] = a[0] + w * ac[0];
+		closest[1] = a[1] + w * ac[1];
+		closest[2] = a[2] + w * ac[2];
+		return;
+	}
+	
+	// Check if P in edge region of BC, if so return projection of P onto BC
+	float va = d3*d6 - d5*d4;
+	if (va <= 0.0f && (d4 - d3) >= 0.0f && (d5 - d6) >= 0.0f)
+	{
+		// barycentric coordinates (0,1-w,w)
+		float w = (d4 - d3) / ((d4 - d3) + (d5 - d6));
+		closest[0] = b[0] + w * (c[0] - b[0]);
+		closest[1] = b[1] + w * (c[1] - b[1]);
+		closest[2] = b[2] + w * (c[2] - b[2]);
+		return;
+	}
+	
+	// P inside face region. Compute Q through its barycentric coordinates (u,v,w)
+	float denom = 1.0f / (va + vb + vc);
+	float v = vb * denom;
+	float w = vc * denom;
+	closest[0] = a[0] + ab[0] * v + ac[0] * w;
+	closest[1] = a[1] + ab[1] * v + ac[1] * w;
+	closest[2] = a[2] + ab[2] * v + ac[2] * w;
+}
+
+bool dtIntersectSegmentPoly2D(const float* p0, const float* p1,
+							  const float* verts, int nverts,
+							  float& tmin, float& tmax,
+							  int& segMin, int& segMax)
+{
+	static const float EPS = 0.00000001f;
+	
+	tmin = 0;
+	tmax = 1;
+	segMin = -1;
+	segMax = -1;
+	
+	float dir[3];
+	dtVsub(dir, p1, p0);
+	
+	for (int i = 0, j = nverts-1; i < nverts; j=i++)
+	{
+		float edge[3], diff[3];
+		dtVsub(edge, &verts[i*3], &verts[j*3]);
+		dtVsub(diff, p0, &verts[j*3]);
+		const float n = dtVperp2D(edge, diff);
+		const float d = dtVperp2D(dir, edge);
+		if (fabsf(d) < EPS)
+		{
+			// S is nearly parallel to this edge
+			if (n < 0)
+				return false;
+			else
+				continue;
+		}
+		const float t = n / d;
+		if (d < 0)
+		{
+			// segment S is entering across this edge
+			if (t > tmin)
+			{
+				tmin = t;
+				segMin = j;
+				// S enters after leaving polygon
+				if (tmin > tmax)
+					return false;
+			}
+		}
+		else
+		{
+			// segment S is leaving across this edge
+			if (t < tmax)
+			{
+				tmax = t;
+				segMax = j;
+				// S leaves before entering polygon
+				if (tmax < tmin)
+					return false;
+			}
+		}
+	}
+	
+	return true;
+}
+
+float dtDistancePtSegSqr2D(const float* pt, const float* p, const float* q, float& t)
+{
+	float pqx = q[0] - p[0];
+	float pqz = q[2] - p[2];
+	float dx = pt[0] - p[0];
+	float dz = pt[2] - p[2];
+	float d = pqx*pqx + pqz*pqz;
+	t = pqx*dx + pqz*dz;
+	if (d > 0) t /= d;
+	if (t < 0) t = 0;
+	else if (t > 1) t = 1;
+	dx = p[0] + t*pqx - pt[0];
+	dz = p[2] + t*pqz - pt[2];
+	return dx*dx + dz*dz;
+}
+
+void dtCalcPolyCenter(float* tc, const unsigned short* idx, int nidx, const float* verts)
+{
+	tc[0] = 0.0f;
+	tc[1] = 0.0f;
+	tc[2] = 0.0f;
+	for (int j = 0; j < nidx; ++j)
+	{
+		const float* v = &verts[idx[j]*3];
+		tc[0] += v[0];
+		tc[1] += v[1];
+		tc[2] += v[2];
+	}
+	const float s = 1.0f / nidx;
+	tc[0] *= s;
+	tc[1] *= s;
+	tc[2] *= s;
+}
+
+bool dtClosestHeightPointTriangle(const float* p, const float* a, const float* b, const float* c, float& h)
+{
+	float v0[3], v1[3], v2[3];
+	dtVsub(v0, c,a);
+	dtVsub(v1, b,a);
+	dtVsub(v2, p,a);
+	
+	const float dot00 = dtVdot2D(v0, v0);
+	const float dot01 = dtVdot2D(v0, v1);
+	const float dot02 = dtVdot2D(v0, v2);
+	const float dot11 = dtVdot2D(v1, v1);
+	const float dot12 = dtVdot2D(v1, v2);
+	
+	// Compute barycentric coordinates
+	const float invDenom = 1.0f / (dot00 * dot11 - dot01 * dot01);
+	const float u = (dot11 * dot02 - dot01 * dot12) * invDenom;
+	const float v = (dot00 * dot12 - dot01 * dot02) * invDenom;
+
+	// The (sloppy) epsilon is needed to allow to get height of points which
+	// are interpolated along the edges of the triangles.
+	static const float EPS = 1e-4f;
+	
+	// If point lies inside the triangle, return interpolated ycoord.
+	if (u >= -EPS && v >= -EPS && (u+v) <= 1+EPS)
+	{
+		h = a[1] + v0[1]*u + v1[1]*v;
+		return true;
+	}
+	
+	return false;
+}
+
+/// @par
+///
+/// All points are projected onto the xz-plane, so the y-values are ignored.
+bool dtPointInPolygon(const float* pt, const float* verts, const int nverts)
+{
+	// TODO: Replace pnpoly with triArea2D tests?
+	int i, j;
+	bool c = false;
+	for (i = 0, j = nverts-1; i < nverts; j = i++)
+	{
+		const float* vi = &verts[i*3];
+		const float* vj = &verts[j*3];
+		if (((vi[2] > pt[2]) != (vj[2] > pt[2])) &&
+			(pt[0] < (vj[0]-vi[0]) * (pt[2]-vi[2]) / (vj[2]-vi[2]) + vi[0]) )
+			c = !c;
+	}
+	return c;
+}
+
+bool dtDistancePtPolyEdgesSqr(const float* pt, const float* verts, const int nverts,
+							  float* ed, float* et)
+{
+	// TODO: Replace pnpoly with triArea2D tests?
+	int i, j;
+	bool c = false;
+	for (i = 0, j = nverts-1; i < nverts; j = i++)
+	{
+		const float* vi = &verts[i*3];
+		const float* vj = &verts[j*3];
+		if (((vi[2] > pt[2]) != (vj[2] > pt[2])) &&
+			(pt[0] < (vj[0]-vi[0]) * (pt[2]-vi[2]) / (vj[2]-vi[2]) + vi[0]) )
+			c = !c;
+		ed[j] = dtDistancePtSegSqr2D(pt, vj, vi, et[j]);
+	}
+	return c;
+}
+
+static void projectPoly(const float* axis, const float* poly, const int npoly,
+						float& rmin, float& rmax)
+{
+	rmin = rmax = dtVdot2D(axis, &poly[0]);
+	for (int i = 1; i < npoly; ++i)
+	{
+		const float d = dtVdot2D(axis, &poly[i*3]);
+		rmin = dtMin(rmin, d);
+		rmax = dtMax(rmax, d);
+	}
+}
+
+inline bool overlapRange(const float amin, const float amax,
+						 const float bmin, const float bmax,
+						 const float eps)
+{
+	return ((amin+eps) > bmax || (amax-eps) < bmin) ? false : true;
+}
+
+/// @par
+///
+/// All vertices are projected onto the xz-plane, so the y-values are ignored.
+bool dtOverlapPolyPoly2D(const float* polya, const int npolya,
+						 const float* polyb, const int npolyb)
+{
+	const float eps = 1e-4f;
+	
+	for (int i = 0, j = npolya-1; i < npolya; j=i++)
+	{
+		const float* va = &polya[j*3];
+		const float* vb = &polya[i*3];
+		const float n[3] = { vb[2]-va[2], 0, -(vb[0]-va[0]) };
+		float amin,amax,bmin,bmax;
+		projectPoly(n, polya, npolya, amin,amax);
+		projectPoly(n, polyb, npolyb, bmin,bmax);
+		if (!overlapRange(amin,amax, bmin,bmax, eps))
+		{
+			// Found separating axis
+			return false;
+		}
+	}
+	for (int i = 0, j = npolyb-1; i < npolyb; j=i++)
+	{
+		const float* va = &polyb[j*3];
+		const float* vb = &polyb[i*3];
+		const float n[3] = { vb[2]-va[2], 0, -(vb[0]-va[0]) };
+		float amin,amax,bmin,bmax;
+		projectPoly(n, polya, npolya, amin,amax);
+		projectPoly(n, polyb, npolyb, bmin,bmax);
+		if (!overlapRange(amin,amax, bmin,bmax, eps))
+		{
+			// Found separating axis
+			return false;
+		}
+	}
+	return true;
+}
+
+// Returns a random point in a convex polygon.
+// Adapted from Graphics Gems article.
+void dtRandomPointInConvexPoly(const float* pts, const int npts, float* areas,
+							   const float s, const float t, float* out)
+{
+	// Calc triangle araes
+	float areasum = 0.0f;
+	for (int i = 2; i < npts; i++) {
+		areas[i] = dtTriArea2D(&pts[0], &pts[(i-1)*3], &pts[i*3]);
+		areasum += dtMax(0.001f, areas[i]);
+	}
+	// Find sub triangle weighted by area.
+	const float thr = s*areasum;
+	float acc = 0.0f;
+	float u = 0.0f;
+	int tri = 0;
+	for (int i = 2; i < npts; i++) {
+		const float dacc = areas[i];
+		if (thr >= acc && thr < (acc+dacc))
+		{
+			u = (thr - acc) / dacc;
+			tri = i;
+			break;
+		}
+		acc += dacc;
+	}
+	
+	float v = dtSqrt(t);
+	
+	const float a = 1 - v;
+	const float b = (1 - u) * v;
+	const float c = u * v;
+	const float* pa = &pts[0];
+	const float* pb = &pts[(tri-1)*3];
+	const float* pc = &pts[tri*3];
+	
+	out[0] = a*pa[0] + b*pb[0] + c*pc[0];
+	out[1] = a*pa[1] + b*pb[1] + c*pc[1];
+	out[2] = a*pa[2] + b*pb[2] + c*pc[2];
+}
+
diff --git a/Engine/lib/recast/Detour/Source/DetourNavMesh.cpp b/Engine/lib/recast/Detour/Source/DetourNavMesh.cpp
new file mode 100644
index 000000000..a4bd38c9a
--- /dev/null
+++ b/Engine/lib/recast/Detour/Source/DetourNavMesh.cpp
@@ -0,0 +1,1443 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <math.h>
+#include <float.h>
+#include <string.h>
+#include <stdio.h>
+#include "DetourNavMesh.h"
+#include "DetourNode.h"
+#include "DetourCommon.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+#include <new>
+
+
+inline bool overlapSlabs(const float* amin, const float* amax,
+						 const float* bmin, const float* bmax,
+						 const float px, const float py)
+{
+	// Check for horizontal overlap.
+	// The segment is shrunken a little so that slabs which touch
+	// at end points are not connected.
+	const float minx = dtMax(amin[0]+px,bmin[0]+px);
+	const float maxx = dtMin(amax[0]-px,bmax[0]-px);
+	if (minx > maxx)
+		return false;
+	
+	// Check vertical overlap.
+	const float ad = (amax[1]-amin[1]) / (amax[0]-amin[0]);
+	const float ak = amin[1] - ad*amin[0];
+	const float bd = (bmax[1]-bmin[1]) / (bmax[0]-bmin[0]);
+	const float bk = bmin[1] - bd*bmin[0];
+	const float aminy = ad*minx + ak;
+	const float amaxy = ad*maxx + ak;
+	const float bminy = bd*minx + bk;
+	const float bmaxy = bd*maxx + bk;
+	const float dmin = bminy - aminy;
+	const float dmax = bmaxy - amaxy;
+		
+	// Crossing segments always overlap.
+	if (dmin*dmax < 0)
+		return true;
+		
+	// Check for overlap at endpoints.
+	const float thr = dtSqr(py*2);
+	if (dmin*dmin <= thr || dmax*dmax <= thr)
+		return true;
+		
+	return false;
+}
+
+static float getSlabCoord(const float* va, const int side)
+{
+	if (side == 0 || side == 4)
+		return va[0];
+	else if (side == 2 || side == 6)
+		return va[2];
+	return 0;
+}
+
+static void calcSlabEndPoints(const float* va, const float* vb, float* bmin, float* bmax, const int side)
+{
+	if (side == 0 || side == 4)
+	{
+		if (va[2] < vb[2])
+		{
+			bmin[0] = va[2];
+			bmin[1] = va[1];
+			bmax[0] = vb[2];
+			bmax[1] = vb[1];
+		}
+		else
+		{
+			bmin[0] = vb[2];
+			bmin[1] = vb[1];
+			bmax[0] = va[2];
+			bmax[1] = va[1];
+		}
+	}
+	else if (side == 2 || side == 6)
+	{
+		if (va[0] < vb[0])
+		{
+			bmin[0] = va[0];
+			bmin[1] = va[1];
+			bmax[0] = vb[0];
+			bmax[1] = vb[1];
+		}
+		else
+		{
+			bmin[0] = vb[0];
+			bmin[1] = vb[1];
+			bmax[0] = va[0];
+			bmax[1] = va[1];
+		}
+	}
+}
+
+inline int computeTileHash(int x, int y, const int mask)
+{
+	const unsigned int h1 = 0x8da6b343; // Large multiplicative constants;
+	const unsigned int h2 = 0xd8163841; // here arbitrarily chosen primes
+	unsigned int n = h1 * x + h2 * y;
+	return (int)(n & mask);
+}
+
+inline unsigned int allocLink(dtMeshTile* tile)
+{
+	if (tile->linksFreeList == DT_NULL_LINK)
+		return DT_NULL_LINK;
+	unsigned int link = tile->linksFreeList;
+	tile->linksFreeList = tile->links[link].next;
+	return link;
+}
+
+inline void freeLink(dtMeshTile* tile, unsigned int link)
+{
+	tile->links[link].next = tile->linksFreeList;
+	tile->linksFreeList = link;
+}
+
+
+dtNavMesh* dtAllocNavMesh()
+{
+	void* mem = dtAlloc(sizeof(dtNavMesh), DT_ALLOC_PERM);
+	if (!mem) return 0;
+	return new(mem) dtNavMesh;
+}
+
+/// @par
+///
+/// This function will only free the memory for tiles with the #DT_TILE_FREE_DATA
+/// flag set.
+void dtFreeNavMesh(dtNavMesh* navmesh)
+{
+	if (!navmesh) return;
+	navmesh->~dtNavMesh();
+	dtFree(navmesh);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+@class dtNavMesh
+
+The navigation mesh consists of one or more tiles defining three primary types of structural data:
+
+A polygon mesh which defines most of the navigation graph. (See rcPolyMesh for its structure.)
+A detail mesh used for determining surface height on the polygon mesh. (See rcPolyMeshDetail for its structure.)
+Off-mesh connections, which define custom point-to-point edges within the navigation graph.
+
+The general build process is as follows:
+
+-# Create rcPolyMesh and rcPolyMeshDetail data using the Recast build pipeline.
+-# Optionally, create off-mesh connection data.
+-# Combine the source data into a dtNavMeshCreateParams structure.
+-# Create a tile data array using dtCreateNavMeshData().
+-# Allocate at dtNavMesh object and initialize it. (For single tile navigation meshes,
+   the tile data is loaded during this step.)
+-# For multi-tile navigation meshes, load the tile data using dtNavMesh::addTile().
+
+Notes:
+
+- This class is usually used in conjunction with the dtNavMeshQuery class for pathfinding.
+- Technically, all navigation meshes are tiled. A 'solo' mesh is simply a navigation mesh initialized 
+  to have only a single tile.
+- This class does not implement any asynchronous methods. So the ::dtStatus result of all methods will 
+  always contain either a success or failure flag.
+
+@see dtNavMeshQuery, dtCreateNavMeshData, dtNavMeshCreateParams, #dtAllocNavMesh, #dtFreeNavMesh
+*/
+
+dtNavMesh::dtNavMesh() :
+	m_tileWidth(0),
+	m_tileHeight(0),
+	m_maxTiles(0),
+	m_tileLutSize(0),
+	m_tileLutMask(0),
+	m_posLookup(0),
+	m_nextFree(0),
+	m_tiles(0),
+	m_saltBits(0),
+	m_tileBits(0),
+	m_polyBits(0)
+{
+	memset(&m_params, 0, sizeof(dtNavMeshParams));
+	m_orig[0] = 0;
+	m_orig[1] = 0;
+	m_orig[2] = 0;
+}
+
+dtNavMesh::~dtNavMesh()
+{
+	for (int i = 0; i < m_maxTiles; ++i)
+	{
+		if (m_tiles[i].flags & DT_TILE_FREE_DATA)
+		{
+			dtFree(m_tiles[i].data);
+			m_tiles[i].data = 0;
+			m_tiles[i].dataSize = 0;
+		}
+	}
+	dtFree(m_posLookup);
+	dtFree(m_tiles);
+}
+		
+dtStatus dtNavMesh::init(const dtNavMeshParams* params)
+{
+	memcpy(&m_params, params, sizeof(dtNavMeshParams));
+	dtVcopy(m_orig, params->orig);
+	m_tileWidth = params->tileWidth;
+	m_tileHeight = params->tileHeight;
+	
+	// Init tiles
+	m_maxTiles = params->maxTiles;
+	m_tileLutSize = dtNextPow2(params->maxTiles/4);
+	if (!m_tileLutSize) m_tileLutSize = 1;
+	m_tileLutMask = m_tileLutSize-1;
+	
+	m_tiles = (dtMeshTile*)dtAlloc(sizeof(dtMeshTile)*m_maxTiles, DT_ALLOC_PERM);
+	if (!m_tiles)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	m_posLookup = (dtMeshTile**)dtAlloc(sizeof(dtMeshTile*)*m_tileLutSize, DT_ALLOC_PERM);
+	if (!m_posLookup)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(m_tiles, 0, sizeof(dtMeshTile)*m_maxTiles);
+	memset(m_posLookup, 0, sizeof(dtMeshTile*)*m_tileLutSize);
+	m_nextFree = 0;
+	for (int i = m_maxTiles-1; i >= 0; --i)
+	{
+		m_tiles[i].salt = 1;
+		m_tiles[i].next = m_nextFree;
+		m_nextFree = &m_tiles[i];
+	}
+	
+	// Init ID generator values.
+	m_tileBits = dtIlog2(dtNextPow2((unsigned int)params->maxTiles));
+	m_polyBits = dtIlog2(dtNextPow2((unsigned int)params->maxPolys));
+	// Only allow 31 salt bits, since the salt mask is calculated using 32bit uint and it will overflow.
+	m_saltBits = dtMin((unsigned int)31, 32 - m_tileBits - m_polyBits);
+	if (m_saltBits < 10)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtNavMesh::init(unsigned char* data, const int dataSize, const int flags)
+{
+	// Make sure the data is in right format.
+	dtMeshHeader* header = (dtMeshHeader*)data;
+	if (header->magic != DT_NAVMESH_MAGIC)
+		return DT_FAILURE | DT_WRONG_MAGIC;
+	if (header->version != DT_NAVMESH_VERSION)
+		return DT_FAILURE | DT_WRONG_VERSION;
+
+	dtNavMeshParams params;
+	dtVcopy(params.orig, header->bmin);
+	params.tileWidth = header->bmax[0] - header->bmin[0];
+	params.tileHeight = header->bmax[2] - header->bmin[2];
+	params.maxTiles = 1;
+	params.maxPolys = header->polyCount;
+	
+	dtStatus status = init(&params);
+	if (dtStatusFailed(status))
+		return status;
+
+	return addTile(data, dataSize, flags, 0, 0);
+}
+
+/// @par
+///
+/// @note The parameters are created automatically when the single tile
+/// initialization is performed.
+const dtNavMeshParams* dtNavMesh::getParams() const
+{
+	return &m_params;
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+int dtNavMesh::findConnectingPolys(const float* va, const float* vb,
+								   const dtMeshTile* tile, int side,
+								   dtPolyRef* con, float* conarea, int maxcon) const
+{
+	if (!tile) return 0;
+	
+	float amin[2], amax[2];
+	calcSlabEndPoints(va,vb, amin,amax, side);
+	const float apos = getSlabCoord(va, side);
+
+	// Remove links pointing to 'side' and compact the links array. 
+	float bmin[2], bmax[2];
+	unsigned short m = DT_EXT_LINK | (unsigned short)side;
+	int n = 0;
+	
+	dtPolyRef base = getPolyRefBase(tile);
+	
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		dtPoly* poly = &tile->polys[i];
+		const int nv = poly->vertCount;
+		for (int j = 0; j < nv; ++j)
+		{
+			// Skip edges which do not point to the right side.
+			if (poly->neis[j] != m) continue;
+			
+			const float* vc = &tile->verts[poly->verts[j]*3];
+			const float* vd = &tile->verts[poly->verts[(j+1) % nv]*3];
+			const float bpos = getSlabCoord(vc, side);
+			
+			// Segments are not close enough.
+			if (dtAbs(apos-bpos) > 0.01f)
+				continue;
+			
+			// Check if the segments touch.
+			calcSlabEndPoints(vc,vd, bmin,bmax, side);
+			
+			if (!overlapSlabs(amin,amax, bmin,bmax, 0.01f, tile->header->walkableClimb)) continue;
+			
+			// Add return value.
+			if (n < maxcon)
+			{
+				conarea[n*2+0] = dtMax(amin[0], bmin[0]);
+				conarea[n*2+1] = dtMin(amax[0], bmax[0]);
+				con[n] = base | (dtPolyRef)i;
+				n++;
+			}
+			break;
+		}
+	}
+	return n;
+}
+
+void dtNavMesh::unconnectExtLinks(dtMeshTile* tile, dtMeshTile* target)
+{
+	if (!tile || !target) return;
+
+	const unsigned int targetNum = decodePolyIdTile(getTileRef(target));
+
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		dtPoly* poly = &tile->polys[i];
+		unsigned int j = poly->firstLink;
+		unsigned int pj = DT_NULL_LINK;
+		while (j != DT_NULL_LINK)
+		{
+			if (tile->links[j].side != 0xff &&
+				decodePolyIdTile(tile->links[j].ref) == targetNum)
+			{
+				// Revove link.
+				unsigned int nj = tile->links[j].next;
+				if (pj == DT_NULL_LINK)
+					poly->firstLink = nj;
+				else
+					tile->links[pj].next = nj;
+				freeLink(tile, j);
+				j = nj;
+			}
+			else
+			{
+				// Advance
+				pj = j;
+				j = tile->links[j].next;
+			}
+		}
+	}
+}
+
+void dtNavMesh::connectExtLinks(dtMeshTile* tile, dtMeshTile* target, int side)
+{
+	if (!tile) return;
+	
+	// Connect border links.
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		dtPoly* poly = &tile->polys[i];
+
+		// Create new links.
+//		unsigned short m = DT_EXT_LINK | (unsigned short)side;
+		
+		const int nv = poly->vertCount;
+		for (int j = 0; j < nv; ++j)
+		{
+			// Skip non-portal edges.
+			if ((poly->neis[j] & DT_EXT_LINK) == 0)
+				continue;
+			
+			const int dir = (int)(poly->neis[j] & 0xff);
+			if (side != -1 && dir != side)
+				continue;
+			
+			// Create new links
+			const float* va = &tile->verts[poly->verts[j]*3];
+			const float* vb = &tile->verts[poly->verts[(j+1) % nv]*3];
+			dtPolyRef nei[4];
+			float neia[4*2];
+			int nnei = findConnectingPolys(va,vb, target, dtOppositeTile(dir), nei,neia,4);
+			for (int k = 0; k < nnei; ++k)
+			{
+				unsigned int idx = allocLink(tile);
+				if (idx != DT_NULL_LINK)
+				{
+					dtLink* link = &tile->links[idx];
+					link->ref = nei[k];
+					link->edge = (unsigned char)j;
+					link->side = (unsigned char)dir;
+					
+					link->next = poly->firstLink;
+					poly->firstLink = idx;
+
+					// Compress portal limits to a byte value.
+					if (dir == 0 || dir == 4)
+					{
+						float tmin = (neia[k*2+0]-va[2]) / (vb[2]-va[2]);
+						float tmax = (neia[k*2+1]-va[2]) / (vb[2]-va[2]);
+						if (tmin > tmax)
+							dtSwap(tmin,tmax);
+						link->bmin = (unsigned char)(dtClamp(tmin, 0.0f, 1.0f)*255.0f);
+						link->bmax = (unsigned char)(dtClamp(tmax, 0.0f, 1.0f)*255.0f);
+					}
+					else if (dir == 2 || dir == 6)
+					{
+						float tmin = (neia[k*2+0]-va[0]) / (vb[0]-va[0]);
+						float tmax = (neia[k*2+1]-va[0]) / (vb[0]-va[0]);
+						if (tmin > tmax)
+							dtSwap(tmin,tmax);
+						link->bmin = (unsigned char)(dtClamp(tmin, 0.0f, 1.0f)*255.0f);
+						link->bmax = (unsigned char)(dtClamp(tmax, 0.0f, 1.0f)*255.0f);
+					}
+				}
+			}
+		}
+	}
+}
+
+void dtNavMesh::connectExtOffMeshLinks(dtMeshTile* tile, dtMeshTile* target, int side)
+{
+	if (!tile) return;
+	
+	// Connect off-mesh links.
+	// We are interested on links which land from target tile to this tile.
+	const unsigned char oppositeSide = (side == -1) ? 0xff : (unsigned char)dtOppositeTile(side);
+	
+	for (int i = 0; i < target->header->offMeshConCount; ++i)
+	{
+		dtOffMeshConnection* targetCon = &target->offMeshCons[i];
+		if (targetCon->side != oppositeSide)
+			continue;
+
+		dtPoly* targetPoly = &target->polys[targetCon->poly];
+		// Skip off-mesh connections which start location could not be connected at all.
+		if (targetPoly->firstLink == DT_NULL_LINK)
+			continue;
+		
+		const float ext[3] = { targetCon->rad, target->header->walkableClimb, targetCon->rad };
+		
+		// Find polygon to connect to.
+		const float* p = &targetCon->pos[3];
+		float nearestPt[3];
+		dtPolyRef ref = findNearestPolyInTile(tile, p, ext, nearestPt);
+		if (!ref)
+			continue;
+		// findNearestPoly may return too optimistic results, further check to make sure. 
+		if (dtSqr(nearestPt[0]-p[0])+dtSqr(nearestPt[2]-p[2]) > dtSqr(targetCon->rad))
+			continue;
+		// Make sure the location is on current mesh.
+		float* v = &target->verts[targetPoly->verts[1]*3];
+		dtVcopy(v, nearestPt);
+				
+		// Link off-mesh connection to target poly.
+		unsigned int idx = allocLink(target);
+		if (idx != DT_NULL_LINK)
+		{
+			dtLink* link = &target->links[idx];
+			link->ref = ref;
+			link->edge = (unsigned char)1;
+			link->side = oppositeSide;
+			link->bmin = link->bmax = 0;
+			// Add to linked list.
+			link->next = targetPoly->firstLink;
+			targetPoly->firstLink = idx;
+		}
+		
+		// Link target poly to off-mesh connection.
+		if (targetCon->flags & DT_OFFMESH_CON_BIDIR)
+		{
+			unsigned int tidx = allocLink(tile);
+			if (tidx != DT_NULL_LINK)
+			{
+				const unsigned short landPolyIdx = (unsigned short)decodePolyIdPoly(ref);
+				dtPoly* landPoly = &tile->polys[landPolyIdx];
+				dtLink* link = &tile->links[tidx];
+				link->ref = getPolyRefBase(target) | (dtPolyRef)(targetCon->poly);
+				link->edge = 0xff;
+				link->side = (unsigned char)(side == -1 ? 0xff : side);
+				link->bmin = link->bmax = 0;
+				// Add to linked list.
+				link->next = landPoly->firstLink;
+				landPoly->firstLink = tidx;
+			}
+		}
+	}
+
+}
+
+void dtNavMesh::connectIntLinks(dtMeshTile* tile)
+{
+	if (!tile) return;
+
+	dtPolyRef base = getPolyRefBase(tile);
+
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		dtPoly* poly = &tile->polys[i];
+		poly->firstLink = DT_NULL_LINK;
+
+		if (poly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+			continue;
+			
+		// Build edge links backwards so that the links will be
+		// in the linked list from lowest index to highest.
+		for (int j = poly->vertCount-1; j >= 0; --j)
+		{
+			// Skip hard and non-internal edges.
+			if (poly->neis[j] == 0 || (poly->neis[j] & DT_EXT_LINK)) continue;
+
+			unsigned int idx = allocLink(tile);
+			if (idx != DT_NULL_LINK)
+			{
+				dtLink* link = &tile->links[idx];
+				link->ref = base | (dtPolyRef)(poly->neis[j]-1);
+				link->edge = (unsigned char)j;
+				link->side = 0xff;
+				link->bmin = link->bmax = 0;
+				// Add to linked list.
+				link->next = poly->firstLink;
+				poly->firstLink = idx;
+			}
+		}			
+	}
+}
+
+void dtNavMesh::baseOffMeshLinks(dtMeshTile* tile)
+{
+	if (!tile) return;
+	
+	dtPolyRef base = getPolyRefBase(tile);
+	
+	// Base off-mesh connection start points.
+	for (int i = 0; i < tile->header->offMeshConCount; ++i)
+	{
+		dtOffMeshConnection* con = &tile->offMeshCons[i];
+		dtPoly* poly = &tile->polys[con->poly];
+	
+		const float ext[3] = { con->rad, tile->header->walkableClimb, con->rad };
+		
+		// Find polygon to connect to.
+		const float* p = &con->pos[0]; // First vertex
+		float nearestPt[3];
+		dtPolyRef ref = findNearestPolyInTile(tile, p, ext, nearestPt);
+		if (!ref) continue;
+		// findNearestPoly may return too optimistic results, further check to make sure. 
+		if (dtSqr(nearestPt[0]-p[0])+dtSqr(nearestPt[2]-p[2]) > dtSqr(con->rad))
+			continue;
+		// Make sure the location is on current mesh.
+		float* v = &tile->verts[poly->verts[0]*3];
+		dtVcopy(v, nearestPt);
+
+		// Link off-mesh connection to target poly.
+		unsigned int idx = allocLink(tile);
+		if (idx != DT_NULL_LINK)
+		{
+			dtLink* link = &tile->links[idx];
+			link->ref = ref;
+			link->edge = (unsigned char)0;
+			link->side = 0xff;
+			link->bmin = link->bmax = 0;
+			// Add to linked list.
+			link->next = poly->firstLink;
+			poly->firstLink = idx;
+		}
+
+		// Start end-point is always connect back to off-mesh connection. 
+		unsigned int tidx = allocLink(tile);
+		if (tidx != DT_NULL_LINK)
+		{
+			const unsigned short landPolyIdx = (unsigned short)decodePolyIdPoly(ref);
+			dtPoly* landPoly = &tile->polys[landPolyIdx];
+			dtLink* link = &tile->links[tidx];
+			link->ref = base | (dtPolyRef)(con->poly);
+			link->edge = 0xff;
+			link->side = 0xff;
+			link->bmin = link->bmax = 0;
+			// Add to linked list.
+			link->next = landPoly->firstLink;
+			landPoly->firstLink = tidx;
+		}
+	}
+}
+
+void dtNavMesh::closestPointOnPolyInTile(const dtMeshTile* tile, unsigned int ip,
+										 const float* pos, float* closest) const
+{
+	const dtPoly* poly = &tile->polys[ip];
+	
+	float closestDistSqr = FLT_MAX;
+	const dtPolyDetail* pd = &tile->detailMeshes[ip];
+	
+	for (int j = 0; j < pd->triCount; ++j)
+	{
+		const unsigned char* t = &tile->detailTris[(pd->triBase+j)*4];
+		const float* v[3];
+		for (int k = 0; k < 3; ++k)
+		{
+			if (t[k] < poly->vertCount)
+				v[k] = &tile->verts[poly->verts[t[k]]*3];
+			else
+				v[k] = &tile->detailVerts[(pd->vertBase+(t[k]-poly->vertCount))*3];
+		}
+		float pt[3];
+		dtClosestPtPointTriangle(pt, pos, v[0], v[1], v[2]);
+		float d = dtVdistSqr(pos, pt);
+		if (d < closestDistSqr)
+		{
+			dtVcopy(closest, pt);
+			closestDistSqr = d;
+		}
+	}
+}
+
+dtPolyRef dtNavMesh::findNearestPolyInTile(const dtMeshTile* tile,
+										   const float* center, const float* extents,
+										   float* nearestPt) const
+{
+	float bmin[3], bmax[3];
+	dtVsub(bmin, center, extents);
+	dtVadd(bmax, center, extents);
+	
+	// Get nearby polygons from proximity grid.
+	dtPolyRef polys[128];
+	int polyCount = queryPolygonsInTile(tile, bmin, bmax, polys, 128);
+	
+	// Find nearest polygon amongst the nearby polygons.
+	dtPolyRef nearest = 0;
+	float nearestDistanceSqr = FLT_MAX;
+	for (int i = 0; i < polyCount; ++i)
+	{
+		dtPolyRef ref = polys[i];
+		float closestPtPoly[3];
+		closestPointOnPolyInTile(tile, decodePolyIdPoly(ref), center, closestPtPoly);
+		float d = dtVdistSqr(center, closestPtPoly);
+		if (d < nearestDistanceSqr)
+		{
+			if (nearestPt)
+				dtVcopy(nearestPt, closestPtPoly);
+			nearestDistanceSqr = d;
+			nearest = ref;
+		}
+	}
+	
+	return nearest;
+}
+
+int dtNavMesh::queryPolygonsInTile(const dtMeshTile* tile, const float* qmin, const float* qmax,
+								   dtPolyRef* polys, const int maxPolys) const
+{
+	if (tile->bvTree)
+	{
+		const dtBVNode* node = &tile->bvTree[0];
+		const dtBVNode* end = &tile->bvTree[tile->header->bvNodeCount];
+		const float* tbmin = tile->header->bmin;
+		const float* tbmax = tile->header->bmax;
+		const float qfac = tile->header->bvQuantFactor;
+		
+		// Calculate quantized box
+		unsigned short bmin[3], bmax[3];
+		// dtClamp query box to world box.
+		float minx = dtClamp(qmin[0], tbmin[0], tbmax[0]) - tbmin[0];
+		float miny = dtClamp(qmin[1], tbmin[1], tbmax[1]) - tbmin[1];
+		float minz = dtClamp(qmin[2], tbmin[2], tbmax[2]) - tbmin[2];
+		float maxx = dtClamp(qmax[0], tbmin[0], tbmax[0]) - tbmin[0];
+		float maxy = dtClamp(qmax[1], tbmin[1], tbmax[1]) - tbmin[1];
+		float maxz = dtClamp(qmax[2], tbmin[2], tbmax[2]) - tbmin[2];
+		// Quantize
+		bmin[0] = (unsigned short)(qfac * minx) & 0xfffe;
+		bmin[1] = (unsigned short)(qfac * miny) & 0xfffe;
+		bmin[2] = (unsigned short)(qfac * minz) & 0xfffe;
+		bmax[0] = (unsigned short)(qfac * maxx + 1) | 1;
+		bmax[1] = (unsigned short)(qfac * maxy + 1) | 1;
+		bmax[2] = (unsigned short)(qfac * maxz + 1) | 1;
+		
+		// Traverse tree
+		dtPolyRef base = getPolyRefBase(tile);
+		int n = 0;
+		while (node < end)
+		{
+			const bool overlap = dtOverlapQuantBounds(bmin, bmax, node->bmin, node->bmax);
+			const bool isLeafNode = node->i >= 0;
+			
+			if (isLeafNode && overlap)
+			{
+				if (n < maxPolys)
+					polys[n++] = base | (dtPolyRef)node->i;
+			}
+			
+			if (overlap || isLeafNode)
+				node++;
+			else
+			{
+				const int escapeIndex = -node->i;
+				node += escapeIndex;
+			}
+		}
+		
+		return n;
+	}
+	else
+	{
+		float bmin[3], bmax[3];
+		int n = 0;
+		dtPolyRef base = getPolyRefBase(tile);
+		for (int i = 0; i < tile->header->polyCount; ++i)
+		{
+			dtPoly* p = &tile->polys[i];
+			// Do not return off-mesh connection polygons.
+			if (p->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+				continue;
+			// Calc polygon bounds.
+			const float* v = &tile->verts[p->verts[0]*3];
+			dtVcopy(bmin, v);
+			dtVcopy(bmax, v);
+			for (int j = 1; j < p->vertCount; ++j)
+			{
+				v = &tile->verts[p->verts[j]*3];
+				dtVmin(bmin, v);
+				dtVmax(bmax, v);
+			}
+			if (dtOverlapBounds(qmin,qmax, bmin,bmax))
+			{
+				if (n < maxPolys)
+					polys[n++] = base | (dtPolyRef)i;
+			}
+		}
+		return n;
+	}
+}
+
+/// @par
+///
+/// The add operation will fail if the data is in the wrong format, the allocated tile
+/// space is full, or there is a tile already at the specified reference.
+///
+/// The lastRef parameter is used to restore a tile with the same tile
+/// reference it had previously used.  In this case the #dtPolyRef's for the
+/// tile will be restored to the same values they were before the tile was 
+/// removed.
+///
+/// @see dtCreateNavMeshData, #removeTile
+dtStatus dtNavMesh::addTile(unsigned char* data, int dataSize, int flags,
+							dtTileRef lastRef, dtTileRef* result)
+{
+	// Make sure the data is in right format.
+	dtMeshHeader* header = (dtMeshHeader*)data;
+	if (header->magic != DT_NAVMESH_MAGIC)
+		return DT_FAILURE | DT_WRONG_MAGIC;
+	if (header->version != DT_NAVMESH_VERSION)
+		return DT_FAILURE | DT_WRONG_VERSION;
+		
+	// Make sure the location is free.
+	if (getTileAt(header->x, header->y, header->layer))
+		return DT_FAILURE;
+		
+	// Allocate a tile.
+	dtMeshTile* tile = 0;
+	if (!lastRef)
+	{
+		if (m_nextFree)
+		{
+			tile = m_nextFree;
+			m_nextFree = tile->next;
+			tile->next = 0;
+		}
+	}
+	else
+	{
+		// Try to relocate the tile to specific index with same salt.
+		int tileIndex = (int)decodePolyIdTile((dtPolyRef)lastRef);
+		if (tileIndex >= m_maxTiles)
+			return DT_FAILURE | DT_OUT_OF_MEMORY;
+		// Try to find the specific tile id from the free list.
+		dtMeshTile* target = &m_tiles[tileIndex];
+		dtMeshTile* prev = 0;
+		tile = m_nextFree;
+		while (tile && tile != target)
+		{
+			prev = tile;
+			tile = tile->next;
+		}
+		// Could not find the correct location.
+		if (tile != target)
+			return DT_FAILURE | DT_OUT_OF_MEMORY;
+		// Remove from freelist
+		if (!prev)
+			m_nextFree = tile->next;
+		else
+			prev->next = tile->next;
+
+		// Restore salt.
+		tile->salt = decodePolyIdSalt((dtPolyRef)lastRef);
+	}
+
+	// Make sure we could allocate a tile.
+	if (!tile)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	
+	// Insert tile into the position lut.
+	int h = computeTileHash(header->x, header->y, m_tileLutMask);
+	tile->next = m_posLookup[h];
+	m_posLookup[h] = tile;
+	
+	// Patch header pointers.
+	const int headerSize = dtAlign4(sizeof(dtMeshHeader));
+	const int vertsSize = dtAlign4(sizeof(float)*3*header->vertCount);
+	const int polysSize = dtAlign4(sizeof(dtPoly)*header->polyCount);
+	const int linksSize = dtAlign4(sizeof(dtLink)*(header->maxLinkCount));
+	const int detailMeshesSize = dtAlign4(sizeof(dtPolyDetail)*header->detailMeshCount);
+	const int detailVertsSize = dtAlign4(sizeof(float)*3*header->detailVertCount);
+	const int detailTrisSize = dtAlign4(sizeof(unsigned char)*4*header->detailTriCount);
+	const int bvtreeSize = dtAlign4(sizeof(dtBVNode)*header->bvNodeCount);
+	const int offMeshLinksSize = dtAlign4(sizeof(dtOffMeshConnection)*header->offMeshConCount);
+	
+	unsigned char* d = data + headerSize;
+	tile->verts = (float*)d; d += vertsSize;
+	tile->polys = (dtPoly*)d; d += polysSize;
+	tile->links = (dtLink*)d; d += linksSize;
+	tile->detailMeshes = (dtPolyDetail*)d; d += detailMeshesSize;
+	tile->detailVerts = (float*)d; d += detailVertsSize;
+	tile->detailTris = (unsigned char*)d; d += detailTrisSize;
+	tile->bvTree = (dtBVNode*)d; d += bvtreeSize;
+	tile->offMeshCons = (dtOffMeshConnection*)d; d += offMeshLinksSize;
+
+	// If there are no items in the bvtree, reset the tree pointer.
+	if (!bvtreeSize)
+		tile->bvTree = 0;
+
+	// Build links freelist
+	tile->linksFreeList = 0;
+	tile->links[header->maxLinkCount-1].next = DT_NULL_LINK;
+	for (int i = 0; i < header->maxLinkCount-1; ++i)
+		tile->links[i].next = i+1;
+
+	// Init tile.
+	tile->header = header;
+	tile->data = data;
+	tile->dataSize = dataSize;
+	tile->flags = flags;
+
+	connectIntLinks(tile);
+	baseOffMeshLinks(tile);
+
+	// Create connections with neighbour tiles.
+	static const int MAX_NEIS = 32;
+	dtMeshTile* neis[MAX_NEIS];
+	int nneis;
+	
+	// Connect with layers in current tile.
+	nneis = getTilesAt(header->x, header->y, neis, MAX_NEIS);
+	for (int j = 0; j < nneis; ++j)
+	{
+		if (neis[j] != tile)
+		{
+			connectExtLinks(tile, neis[j], -1);
+			connectExtLinks(neis[j], tile, -1);
+		}
+		connectExtOffMeshLinks(tile, neis[j], -1);
+		connectExtOffMeshLinks(neis[j], tile, -1);
+	}
+	
+	// Connect with neighbour tiles.
+	for (int i = 0; i < 8; ++i)
+	{
+		nneis = getNeighbourTilesAt(header->x, header->y, i, neis, MAX_NEIS);
+		for (int j = 0; j < nneis; ++j)
+		{
+			connectExtLinks(tile, neis[j], i);
+			connectExtLinks(neis[j], tile, dtOppositeTile(i));
+			connectExtOffMeshLinks(tile, neis[j], i);
+			connectExtOffMeshLinks(neis[j], tile, dtOppositeTile(i));
+		}
+	}
+	
+	if (result)
+		*result = getTileRef(tile);
+	
+	return DT_SUCCESS;
+}
+
+const dtMeshTile* dtNavMesh::getTileAt(const int x, const int y, const int layer) const
+{
+	// Find tile based on hash.
+	int h = computeTileHash(x,y,m_tileLutMask);
+	dtMeshTile* tile = m_posLookup[h];
+	while (tile)
+	{
+		if (tile->header &&
+			tile->header->x == x &&
+			tile->header->y == y &&
+			tile->header->layer == layer)
+		{
+			return tile;
+		}
+		tile = tile->next;
+	}
+	return 0;
+}
+
+int dtNavMesh::getNeighbourTilesAt(const int x, const int y, const int side, dtMeshTile** tiles, const int maxTiles) const
+{
+	int nx = x, ny = y;
+	switch (side)
+	{
+		case 0: nx++; break;
+		case 1: nx++; ny++; break;
+		case 2: ny++; break;
+		case 3: nx--; ny++; break;
+		case 4: nx--; break;
+		case 5: nx--; ny--; break;
+		case 6: ny--; break;
+		case 7: nx++; ny--; break;
+	};
+
+	return getTilesAt(nx, ny, tiles, maxTiles);
+}
+
+int dtNavMesh::getTilesAt(const int x, const int y, dtMeshTile** tiles, const int maxTiles) const
+{
+	int n = 0;
+	
+	// Find tile based on hash.
+	int h = computeTileHash(x,y,m_tileLutMask);
+	dtMeshTile* tile = m_posLookup[h];
+	while (tile)
+	{
+		if (tile->header &&
+			tile->header->x == x &&
+			tile->header->y == y)
+		{
+			if (n < maxTiles)
+				tiles[n++] = tile;
+		}
+		tile = tile->next;
+	}
+	
+	return n;
+}
+
+/// @par
+///
+/// This function will not fail if the tiles array is too small to hold the
+/// entire result set.  It will simply fill the array to capacity.
+int dtNavMesh::getTilesAt(const int x, const int y, dtMeshTile const** tiles, const int maxTiles) const
+{
+	int n = 0;
+	
+	// Find tile based on hash.
+	int h = computeTileHash(x,y,m_tileLutMask);
+	dtMeshTile* tile = m_posLookup[h];
+	while (tile)
+	{
+		if (tile->header &&
+			tile->header->x == x &&
+			tile->header->y == y)
+		{
+			if (n < maxTiles)
+				tiles[n++] = tile;
+		}
+		tile = tile->next;
+	}
+	
+	return n;
+}
+
+
+dtTileRef dtNavMesh::getTileRefAt(const int x, const int y, const int layer) const
+{
+	// Find tile based on hash.
+	int h = computeTileHash(x,y,m_tileLutMask);
+	dtMeshTile* tile = m_posLookup[h];
+	while (tile)
+	{
+		if (tile->header &&
+			tile->header->x == x &&
+			tile->header->y == y &&
+			tile->header->layer == layer)
+		{
+			return getTileRef(tile);
+		}
+		tile = tile->next;
+	}
+	return 0;
+}
+
+const dtMeshTile* dtNavMesh::getTileByRef(dtTileRef ref) const
+{
+	if (!ref)
+		return 0;
+	unsigned int tileIndex = decodePolyIdTile((dtPolyRef)ref);
+	unsigned int tileSalt = decodePolyIdSalt((dtPolyRef)ref);
+	if ((int)tileIndex >= m_maxTiles)
+		return 0;
+	const dtMeshTile* tile = &m_tiles[tileIndex];
+	if (tile->salt != tileSalt)
+		return 0;
+	return tile;
+}
+
+int dtNavMesh::getMaxTiles() const
+{
+	return m_maxTiles;
+}
+
+dtMeshTile* dtNavMesh::getTile(int i)
+{
+	return &m_tiles[i];
+}
+
+const dtMeshTile* dtNavMesh::getTile(int i) const
+{
+	return &m_tiles[i];
+}
+
+void dtNavMesh::calcTileLoc(const float* pos, int* tx, int* ty) const
+{
+	*tx = (int)floorf((pos[0]-m_orig[0]) / m_tileWidth);
+	*ty = (int)floorf((pos[2]-m_orig[2]) / m_tileHeight);
+}
+
+dtStatus dtNavMesh::getTileAndPolyByRef(const dtPolyRef ref, const dtMeshTile** tile, const dtPoly** poly) const
+{
+	if (!ref) return DT_FAILURE;
+	unsigned int salt, it, ip;
+	decodePolyId(ref, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return DT_FAILURE | DT_INVALID_PARAM;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return DT_FAILURE | DT_INVALID_PARAM;
+	if (ip >= (unsigned int)m_tiles[it].header->polyCount) return DT_FAILURE | DT_INVALID_PARAM;
+	*tile = &m_tiles[it];
+	*poly = &m_tiles[it].polys[ip];
+	return DT_SUCCESS;
+}
+
+/// @par
+///
+/// @warning Only use this function if it is known that the provided polygon
+/// reference is valid. This function is faster than #getTileAndPolyByRef, but
+/// it does not validate the reference.
+void dtNavMesh::getTileAndPolyByRefUnsafe(const dtPolyRef ref, const dtMeshTile** tile, const dtPoly** poly) const
+{
+	unsigned int salt, it, ip;
+	decodePolyId(ref, salt, it, ip);
+	*tile = &m_tiles[it];
+	*poly = &m_tiles[it].polys[ip];
+}
+
+bool dtNavMesh::isValidPolyRef(dtPolyRef ref) const
+{
+	if (!ref) return false;
+	unsigned int salt, it, ip;
+	decodePolyId(ref, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return false;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return false;
+	if (ip >= (unsigned int)m_tiles[it].header->polyCount) return false;
+	return true;
+}
+
+/// @par
+///
+/// This function returns the data for the tile so that, if desired,
+/// it can be added back to the navigation mesh at a later point.
+///
+/// @see #addTile
+dtStatus dtNavMesh::removeTile(dtTileRef ref, unsigned char** data, int* dataSize)
+{
+	if (!ref)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	unsigned int tileIndex = decodePolyIdTile((dtPolyRef)ref);
+	unsigned int tileSalt = decodePolyIdSalt((dtPolyRef)ref);
+	if ((int)tileIndex >= m_maxTiles)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	dtMeshTile* tile = &m_tiles[tileIndex];
+	if (tile->salt != tileSalt)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Remove tile from hash lookup.
+	int h = computeTileHash(tile->header->x,tile->header->y,m_tileLutMask);
+	dtMeshTile* prev = 0;
+	dtMeshTile* cur = m_posLookup[h];
+	while (cur)
+	{
+		if (cur == tile)
+		{
+			if (prev)
+				prev->next = cur->next;
+			else
+				m_posLookup[h] = cur->next;
+			break;
+		}
+		prev = cur;
+		cur = cur->next;
+	}
+	
+	// Remove connections to neighbour tiles.
+	// Create connections with neighbour tiles.
+	static const int MAX_NEIS = 32;
+	dtMeshTile* neis[MAX_NEIS];
+	int nneis;
+	
+	// Connect with layers in current tile.
+	nneis = getTilesAt(tile->header->x, tile->header->y, neis, MAX_NEIS);
+	for (int j = 0; j < nneis; ++j)
+	{
+		if (neis[j] == tile) continue;
+		unconnectExtLinks(neis[j], tile);
+	}
+	
+	// Connect with neighbour tiles.
+	for (int i = 0; i < 8; ++i)
+	{
+		nneis = getNeighbourTilesAt(tile->header->x, tile->header->y, i, neis, MAX_NEIS);
+		for (int j = 0; j < nneis; ++j)
+			unconnectExtLinks(neis[j], tile);
+	}
+		
+	// Reset tile.
+	if (tile->flags & DT_TILE_FREE_DATA)
+	{
+		// Owns data
+		dtFree(tile->data);
+		tile->data = 0;
+		tile->dataSize = 0;
+		if (data) *data = 0;
+		if (dataSize) *dataSize = 0;
+	}
+	else
+	{
+		if (data) *data = tile->data;
+		if (dataSize) *dataSize = tile->dataSize;
+	}
+
+	tile->header = 0;
+	tile->flags = 0;
+	tile->linksFreeList = 0;
+	tile->polys = 0;
+	tile->verts = 0;
+	tile->links = 0;
+	tile->detailMeshes = 0;
+	tile->detailVerts = 0;
+	tile->detailTris = 0;
+	tile->bvTree = 0;
+	tile->offMeshCons = 0;
+
+	// Update salt, salt should never be zero.
+	tile->salt = (tile->salt+1) & ((1<<m_saltBits)-1);
+	if (tile->salt == 0)
+		tile->salt++;
+
+	// Add to free list.
+	tile->next = m_nextFree;
+	m_nextFree = tile;
+
+	return DT_SUCCESS;
+}
+
+dtTileRef dtNavMesh::getTileRef(const dtMeshTile* tile) const
+{
+	if (!tile) return 0;
+	const unsigned int it = (unsigned int)(tile - m_tiles);
+	return (dtTileRef)encodePolyId(tile->salt, it, 0);
+}
+
+/// @par
+///
+/// Example use case:
+/// @code
+///
+/// const dtPolyRef base = navmesh->getPolyRefBase(tile);
+/// for (int i = 0; i < tile->header->polyCount; ++i)
+/// {
+///     const dtPoly* p = &tile->polys[i];
+///     const dtPolyRef ref = base | (dtPolyRef)i;
+///     
+///     // Use the reference to access the polygon data.
+/// }
+/// @endcode
+dtPolyRef dtNavMesh::getPolyRefBase(const dtMeshTile* tile) const
+{
+	if (!tile) return 0;
+	const unsigned int it = (unsigned int)(tile - m_tiles);
+	return encodePolyId(tile->salt, it, 0);
+}
+
+struct dtTileState
+{
+	int magic;								// Magic number, used to identify the data.
+	int version;							// Data version number.
+	dtTileRef ref;							// Tile ref at the time of storing the data.
+};
+
+struct dtPolyState
+{
+	unsigned short flags;						// Flags (see dtPolyFlags).
+	unsigned char area;							// Area ID of the polygon.
+};
+
+///  @see #storeTileState
+int dtNavMesh::getTileStateSize(const dtMeshTile* tile) const
+{
+	if (!tile) return 0;
+	const int headerSize = dtAlign4(sizeof(dtTileState));
+	const int polyStateSize = dtAlign4(sizeof(dtPolyState) * tile->header->polyCount);
+	return headerSize + polyStateSize;
+}
+
+/// @par
+///
+/// Tile state includes non-structural data such as polygon flags, area ids, etc.
+/// @note The state data is only valid until the tile reference changes.
+/// @see #getTileStateSize, #restoreTileState
+dtStatus dtNavMesh::storeTileState(const dtMeshTile* tile, unsigned char* data, const int maxDataSize) const
+{
+	// Make sure there is enough space to store the state.
+	const int sizeReq = getTileStateSize(tile);
+	if (maxDataSize < sizeReq)
+		return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+		
+	dtTileState* tileState = (dtTileState*)data; data += dtAlign4(sizeof(dtTileState));
+	dtPolyState* polyStates = (dtPolyState*)data; data += dtAlign4(sizeof(dtPolyState) * tile->header->polyCount);
+	
+	// Store tile state.
+	tileState->magic = DT_NAVMESH_STATE_MAGIC;
+	tileState->version = DT_NAVMESH_STATE_VERSION;
+	tileState->ref = getTileRef(tile);
+	
+	// Store per poly state.
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		const dtPoly* p = &tile->polys[i];
+		dtPolyState* s = &polyStates[i];
+		s->flags = p->flags;
+		s->area = p->getArea();
+	}
+	
+	return DT_SUCCESS;
+}
+
+/// @par
+///
+/// Tile state includes non-structural data such as polygon flags, area ids, etc.
+/// @note This function does not impact the tile's #dtTileRef and #dtPolyRef's.
+/// @see #storeTileState
+dtStatus dtNavMesh::restoreTileState(dtMeshTile* tile, const unsigned char* data, const int maxDataSize)
+{
+	// Make sure there is enough space to store the state.
+	const int sizeReq = getTileStateSize(tile);
+	if (maxDataSize < sizeReq)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	const dtTileState* tileState = (const dtTileState*)data; data += dtAlign4(sizeof(dtTileState));
+	const dtPolyState* polyStates = (const dtPolyState*)data; data += dtAlign4(sizeof(dtPolyState) * tile->header->polyCount);
+	
+	// Check that the restore is possible.
+	if (tileState->magic != DT_NAVMESH_STATE_MAGIC)
+		return DT_FAILURE | DT_WRONG_MAGIC;
+	if (tileState->version != DT_NAVMESH_STATE_VERSION)
+		return DT_FAILURE | DT_WRONG_VERSION;
+	if (tileState->ref != getTileRef(tile))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Restore per poly state.
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		dtPoly* p = &tile->polys[i];
+		const dtPolyState* s = &polyStates[i];
+		p->flags = s->flags;
+		p->setArea(s->area);
+	}
+	
+	return DT_SUCCESS;
+}
+
+/// @par
+///
+/// Off-mesh connections are stored in the navigation mesh as special 2-vertex 
+/// polygons with a single edge. At least one of the vertices is expected to be 
+/// inside a normal polygon. So an off-mesh connection is "entered" from a 
+/// normal polygon at one of its endpoints. This is the polygon identified by 
+/// the prevRef parameter.
+dtStatus dtNavMesh::getOffMeshConnectionPolyEndPoints(dtPolyRef prevRef, dtPolyRef polyRef, float* startPos, float* endPos) const
+{
+	unsigned int salt, it, ip;
+
+	if (!polyRef)
+		return DT_FAILURE;
+	
+	// Get current polygon
+	decodePolyId(polyRef, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return DT_FAILURE | DT_INVALID_PARAM;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return DT_FAILURE | DT_INVALID_PARAM;
+	const dtMeshTile* tile = &m_tiles[it];
+	if (ip >= (unsigned int)tile->header->polyCount) return DT_FAILURE | DT_INVALID_PARAM;
+	const dtPoly* poly = &tile->polys[ip];
+
+	// Make sure that the current poly is indeed off-mesh link.
+	if (poly->getType() != DT_POLYTYPE_OFFMESH_CONNECTION)
+		return DT_FAILURE;
+
+	// Figure out which way to hand out the vertices.
+	int idx0 = 0, idx1 = 1;
+	
+	// Find link that points to first vertex.
+	for (unsigned int i = poly->firstLink; i != DT_NULL_LINK; i = tile->links[i].next)
+	{
+		if (tile->links[i].edge == 0)
+		{
+			if (tile->links[i].ref != prevRef)
+			{
+				idx0 = 1;
+				idx1 = 0;
+			}
+			break;
+		}
+	}
+	
+	dtVcopy(startPos, &tile->verts[poly->verts[idx0]*3]);
+	dtVcopy(endPos, &tile->verts[poly->verts[idx1]*3]);
+
+	return DT_SUCCESS;
+}
+
+
+const dtOffMeshConnection* dtNavMesh::getOffMeshConnectionByRef(dtPolyRef ref) const
+{
+	unsigned int salt, it, ip;
+	
+	if (!ref)
+		return 0;
+	
+	// Get current polygon
+	decodePolyId(ref, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return 0;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return 0;
+	const dtMeshTile* tile = &m_tiles[it];
+	if (ip >= (unsigned int)tile->header->polyCount) return 0;
+	const dtPoly* poly = &tile->polys[ip];
+	
+	// Make sure that the current poly is indeed off-mesh link.
+	if (poly->getType() != DT_POLYTYPE_OFFMESH_CONNECTION)
+		return 0;
+
+	const unsigned int idx =  ip - tile->header->offMeshBase;
+	dtAssert(idx < (unsigned int)tile->header->offMeshConCount);
+	return &tile->offMeshCons[idx];
+}
+
+
+dtStatus dtNavMesh::setPolyFlags(dtPolyRef ref, unsigned short flags)
+{
+	if (!ref) return DT_FAILURE;
+	unsigned int salt, it, ip;
+	decodePolyId(ref, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return DT_FAILURE | DT_INVALID_PARAM;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return DT_FAILURE | DT_INVALID_PARAM;
+	dtMeshTile* tile = &m_tiles[it];
+	if (ip >= (unsigned int)tile->header->polyCount) return DT_FAILURE | DT_INVALID_PARAM;
+	dtPoly* poly = &tile->polys[ip];
+	
+	// Change flags.
+	poly->flags = flags;
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtNavMesh::getPolyFlags(dtPolyRef ref, unsigned short* resultFlags) const
+{
+	if (!ref) return DT_FAILURE;
+	unsigned int salt, it, ip;
+	decodePolyId(ref, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return DT_FAILURE | DT_INVALID_PARAM;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return DT_FAILURE | DT_INVALID_PARAM;
+	const dtMeshTile* tile = &m_tiles[it];
+	if (ip >= (unsigned int)tile->header->polyCount) return DT_FAILURE | DT_INVALID_PARAM;
+	const dtPoly* poly = &tile->polys[ip];
+
+	*resultFlags = poly->flags;
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtNavMesh::setPolyArea(dtPolyRef ref, unsigned char area)
+{
+	if (!ref) return DT_FAILURE;
+	unsigned int salt, it, ip;
+	decodePolyId(ref, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return DT_FAILURE | DT_INVALID_PARAM;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return DT_FAILURE | DT_INVALID_PARAM;
+	dtMeshTile* tile = &m_tiles[it];
+	if (ip >= (unsigned int)tile->header->polyCount) return DT_FAILURE | DT_INVALID_PARAM;
+	dtPoly* poly = &tile->polys[ip];
+	
+	poly->setArea(area);
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtNavMesh::getPolyArea(dtPolyRef ref, unsigned char* resultArea) const
+{
+	if (!ref) return DT_FAILURE;
+	unsigned int salt, it, ip;
+	decodePolyId(ref, salt, it, ip);
+	if (it >= (unsigned int)m_maxTiles) return DT_FAILURE | DT_INVALID_PARAM;
+	if (m_tiles[it].salt != salt || m_tiles[it].header == 0) return DT_FAILURE | DT_INVALID_PARAM;
+	const dtMeshTile* tile = &m_tiles[it];
+	if (ip >= (unsigned int)tile->header->polyCount) return DT_FAILURE | DT_INVALID_PARAM;
+	const dtPoly* poly = &tile->polys[ip];
+	
+	*resultArea = poly->getArea();
+	
+	return DT_SUCCESS;
+}
+
diff --git a/Engine/lib/recast/Detour/Source/DetourNavMeshBuilder.cpp b/Engine/lib/recast/Detour/Source/DetourNavMeshBuilder.cpp
new file mode 100644
index 000000000..9d8471b96
--- /dev/null
+++ b/Engine/lib/recast/Detour/Source/DetourNavMeshBuilder.cpp
@@ -0,0 +1,775 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+#include "DetourNavMesh.h"
+#include "DetourCommon.h"
+#include "DetourNavMeshBuilder.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+
+static unsigned short MESH_NULL_IDX = 0xffff;
+
+
+struct BVItem
+{
+	unsigned short bmin[3];
+	unsigned short bmax[3];
+	int i;
+};
+
+static int compareItemX(const void* va, const void* vb)
+{
+	const BVItem* a = (const BVItem*)va;
+	const BVItem* b = (const BVItem*)vb;
+	if (a->bmin[0] < b->bmin[0])
+		return -1;
+	if (a->bmin[0] > b->bmin[0])
+		return 1;
+	return 0;
+}
+
+static int compareItemY(const void* va, const void* vb)
+{
+	const BVItem* a = (const BVItem*)va;
+	const BVItem* b = (const BVItem*)vb;
+	if (a->bmin[1] < b->bmin[1])
+		return -1;
+	if (a->bmin[1] > b->bmin[1])
+		return 1;
+	return 0;
+}
+
+static int compareItemZ(const void* va, const void* vb)
+{
+	const BVItem* a = (const BVItem*)va;
+	const BVItem* b = (const BVItem*)vb;
+	if (a->bmin[2] < b->bmin[2])
+		return -1;
+	if (a->bmin[2] > b->bmin[2])
+		return 1;
+	return 0;
+}
+
+static void calcExtends(BVItem* items, const int /*nitems*/, const int imin, const int imax,
+						unsigned short* bmin, unsigned short* bmax)
+{
+	bmin[0] = items[imin].bmin[0];
+	bmin[1] = items[imin].bmin[1];
+	bmin[2] = items[imin].bmin[2];
+	
+	bmax[0] = items[imin].bmax[0];
+	bmax[1] = items[imin].bmax[1];
+	bmax[2] = items[imin].bmax[2];
+	
+	for (int i = imin+1; i < imax; ++i)
+	{
+		const BVItem& it = items[i];
+		if (it.bmin[0] < bmin[0]) bmin[0] = it.bmin[0];
+		if (it.bmin[1] < bmin[1]) bmin[1] = it.bmin[1];
+		if (it.bmin[2] < bmin[2]) bmin[2] = it.bmin[2];
+		
+		if (it.bmax[0] > bmax[0]) bmax[0] = it.bmax[0];
+		if (it.bmax[1] > bmax[1]) bmax[1] = it.bmax[1];
+		if (it.bmax[2] > bmax[2]) bmax[2] = it.bmax[2];
+	}
+}
+
+inline int longestAxis(unsigned short x, unsigned short y, unsigned short z)
+{
+	int	axis = 0;
+	unsigned short maxVal = x;
+	if (y > maxVal)
+	{
+		axis = 1;
+		maxVal = y;
+	}
+	if (z > maxVal)
+	{
+		axis = 2;
+		maxVal = z;
+	}
+	return axis;
+}
+
+static void subdivide(BVItem* items, int nitems, int imin, int imax, int& curNode, dtBVNode* nodes)
+{
+	int inum = imax - imin;
+	int icur = curNode;
+	
+	dtBVNode& node = nodes[curNode++];
+	
+	if (inum == 1)
+	{
+		// Leaf
+		node.bmin[0] = items[imin].bmin[0];
+		node.bmin[1] = items[imin].bmin[1];
+		node.bmin[2] = items[imin].bmin[2];
+		
+		node.bmax[0] = items[imin].bmax[0];
+		node.bmax[1] = items[imin].bmax[1];
+		node.bmax[2] = items[imin].bmax[2];
+		
+		node.i = items[imin].i;
+	}
+	else
+	{
+		// Split
+		calcExtends(items, nitems, imin, imax, node.bmin, node.bmax);
+		
+		int	axis = longestAxis(node.bmax[0] - node.bmin[0],
+							   node.bmax[1] - node.bmin[1],
+							   node.bmax[2] - node.bmin[2]);
+		
+		if (axis == 0)
+		{
+			// Sort along x-axis
+			qsort(items+imin, inum, sizeof(BVItem), compareItemX);
+		}
+		else if (axis == 1)
+		{
+			// Sort along y-axis
+			qsort(items+imin, inum, sizeof(BVItem), compareItemY);
+		}
+		else
+		{
+			// Sort along z-axis
+			qsort(items+imin, inum, sizeof(BVItem), compareItemZ);
+		}
+		
+		int isplit = imin+inum/2;
+		
+		// Left
+		subdivide(items, nitems, imin, isplit, curNode, nodes);
+		// Right
+		subdivide(items, nitems, isplit, imax, curNode, nodes);
+		
+		int iescape = curNode - icur;
+		// Negative index means escape.
+		node.i = -iescape;
+	}
+}
+
+static int createBVTree(const unsigned short* verts, const int /*nverts*/,
+						const unsigned short* polys, const int npolys, const int nvp,
+						const float cs, const float ch,
+						const int /*nnodes*/, dtBVNode* nodes)
+{
+	// Build tree
+	BVItem* items = (BVItem*)dtAlloc(sizeof(BVItem)*npolys, DT_ALLOC_TEMP);
+	for (int i = 0; i < npolys; i++)
+	{
+		BVItem& it = items[i];
+		it.i = i;
+		// Calc polygon bounds.
+		const unsigned short* p = &polys[i*nvp*2];
+		it.bmin[0] = it.bmax[0] = verts[p[0]*3+0];
+		it.bmin[1] = it.bmax[1] = verts[p[0]*3+1];
+		it.bmin[2] = it.bmax[2] = verts[p[0]*3+2];
+		
+		for (int j = 1; j < nvp; ++j)
+		{
+			if (p[j] == MESH_NULL_IDX) break;
+			unsigned short x = verts[p[j]*3+0];
+			unsigned short y = verts[p[j]*3+1];
+			unsigned short z = verts[p[j]*3+2];
+			
+			if (x < it.bmin[0]) it.bmin[0] = x;
+			if (y < it.bmin[1]) it.bmin[1] = y;
+			if (z < it.bmin[2]) it.bmin[2] = z;
+			
+			if (x > it.bmax[0]) it.bmax[0] = x;
+			if (y > it.bmax[1]) it.bmax[1] = y;
+			if (z > it.bmax[2]) it.bmax[2] = z;
+		}
+		// Remap y
+		it.bmin[1] = (unsigned short)floorf((float)it.bmin[1]*ch/cs);
+		it.bmax[1] = (unsigned short)ceilf((float)it.bmax[1]*ch/cs);
+	}
+	
+	int curNode = 0;
+	subdivide(items, npolys, 0, npolys, curNode, nodes);
+	
+	dtFree(items);
+	
+	return curNode;
+}
+
+static unsigned char classifyOffMeshPoint(const float* pt, const float* bmin, const float* bmax)
+{
+	static const unsigned char XP = 1<<0;
+	static const unsigned char ZP = 1<<1;
+	static const unsigned char XM = 1<<2;
+	static const unsigned char ZM = 1<<3;	
+
+	unsigned char outcode = 0; 
+	outcode |= (pt[0] >= bmax[0]) ? XP : 0;
+	outcode |= (pt[2] >= bmax[2]) ? ZP : 0;
+	outcode |= (pt[0] < bmin[0])  ? XM : 0;
+	outcode |= (pt[2] < bmin[2])  ? ZM : 0;
+
+	switch (outcode)
+	{
+	case XP: return 0;
+	case XP|ZP: return 1;
+	case ZP: return 2;
+	case XM|ZP: return 3;
+	case XM: return 4;
+	case XM|ZM: return 5;
+	case ZM: return 6;
+	case XP|ZM: return 7;
+	};
+
+	return 0xff;	
+}
+
+// TODO: Better error handling.
+
+/// @par
+/// 
+/// The output data array is allocated using the detour allocator (dtAlloc()).  The method
+/// used to free the memory will be determined by how the tile is added to the navigation
+/// mesh.
+///
+/// @see dtNavMesh, dtNavMesh::addTile()
+bool dtCreateNavMeshData(dtNavMeshCreateParams* params, unsigned char** outData, int* outDataSize)
+{
+	if (params->nvp > DT_VERTS_PER_POLYGON)
+		return false;
+	if (params->vertCount >= 0xffff)
+		return false;
+	if (!params->vertCount || !params->verts)
+		return false;
+	if (!params->polyCount || !params->polys)
+		return false;
+
+	const int nvp = params->nvp;
+	
+	// Classify off-mesh connection points. We store only the connections
+	// whose start point is inside the tile.
+	unsigned char* offMeshConClass = 0;
+	int storedOffMeshConCount = 0;
+	int offMeshConLinkCount = 0;
+	
+	if (params->offMeshConCount > 0)
+	{
+		offMeshConClass = (unsigned char*)dtAlloc(sizeof(unsigned char)*params->offMeshConCount*2, DT_ALLOC_TEMP);
+		if (!offMeshConClass)
+			return false;
+
+		// Find tight heigh bounds, used for culling out off-mesh start locations.
+		float hmin = FLT_MAX;
+		float hmax = -FLT_MAX;
+		
+		if (params->detailVerts && params->detailVertsCount)
+		{
+			for (int i = 0; i < params->detailVertsCount; ++i)
+			{
+				const float h = params->detailVerts[i*3+1];
+				hmin = dtMin(hmin,h);
+				hmax = dtMax(hmax,h);
+			}
+		}
+		else
+		{
+			for (int i = 0; i < params->vertCount; ++i)
+			{
+				const unsigned short* iv = &params->verts[i*3];
+				const float h = params->bmin[1] + iv[1] * params->ch;
+				hmin = dtMin(hmin,h);
+				hmax = dtMax(hmax,h);
+			}
+		}
+		hmin -= params->walkableClimb;
+		hmax += params->walkableClimb;
+		float bmin[3], bmax[3];
+		dtVcopy(bmin, params->bmin);
+		dtVcopy(bmax, params->bmax);
+		bmin[1] = hmin;
+		bmax[1] = hmax;
+
+		for (int i = 0; i < params->offMeshConCount; ++i)
+		{
+			const float* p0 = &params->offMeshConVerts[(i*2+0)*3];
+			const float* p1 = &params->offMeshConVerts[(i*2+1)*3];
+			offMeshConClass[i*2+0] = classifyOffMeshPoint(p0, bmin, bmax);
+			offMeshConClass[i*2+1] = classifyOffMeshPoint(p1, bmin, bmax);
+
+			// Zero out off-mesh start positions which are not even potentially touching the mesh.
+			if (offMeshConClass[i*2+0] == 0xff)
+			{
+				if (p0[1] < bmin[1] || p0[1] > bmax[1])
+					offMeshConClass[i*2+0] = 0;
+			}
+
+			// Cound how many links should be allocated for off-mesh connections.
+			if (offMeshConClass[i*2+0] == 0xff)
+				offMeshConLinkCount++;
+			if (offMeshConClass[i*2+1] == 0xff)
+				offMeshConLinkCount++;
+
+			if (offMeshConClass[i*2+0] == 0xff)
+				storedOffMeshConCount++;
+		}
+	}
+	
+	// Off-mesh connectionss are stored as polygons, adjust values.
+	const int totPolyCount = params->polyCount + storedOffMeshConCount;
+	const int totVertCount = params->vertCount + storedOffMeshConCount*2;
+	
+	// Find portal edges which are at tile borders.
+	int edgeCount = 0;
+	int portalCount = 0;
+	for (int i = 0; i < params->polyCount; ++i)
+	{
+		const unsigned short* p = &params->polys[i*2*nvp];
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (p[j] == MESH_NULL_IDX) break;
+			edgeCount++;
+			
+			if (p[nvp+j] & 0x8000)
+			{
+				unsigned short dir = p[nvp+j] & 0xf;
+				if (dir != 0xf)
+					portalCount++;
+			}
+		}
+	}
+
+	const int maxLinkCount = edgeCount + portalCount*2 + offMeshConLinkCount*2;
+	
+	// Find unique detail vertices.
+	int uniqueDetailVertCount = 0;
+	int detailTriCount = 0;
+	if (params->detailMeshes)
+	{
+		// Has detail mesh, count unique detail vertex count and use input detail tri count.
+		detailTriCount = params->detailTriCount;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			const unsigned short* p = &params->polys[i*nvp*2];
+			int ndv = params->detailMeshes[i*4+1];
+			int nv = 0;
+			for (int j = 0; j < nvp; ++j)
+			{
+				if (p[j] == MESH_NULL_IDX) break;
+				nv++;
+			}
+			ndv -= nv;
+			uniqueDetailVertCount += ndv;
+		}
+	}
+	else
+	{
+		// No input detail mesh, build detail mesh from nav polys.
+		uniqueDetailVertCount = 0; // No extra detail verts.
+		detailTriCount = 0;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			const unsigned short* p = &params->polys[i*nvp*2];
+			int nv = 0;
+			for (int j = 0; j < nvp; ++j)
+			{
+				if (p[j] == MESH_NULL_IDX) break;
+				nv++;
+			}
+			detailTriCount += nv-2;
+		}
+	}
+	
+	// Calculate data size
+	const int headerSize = dtAlign4(sizeof(dtMeshHeader));
+	const int vertsSize = dtAlign4(sizeof(float)*3*totVertCount);
+	const int polysSize = dtAlign4(sizeof(dtPoly)*totPolyCount);
+	const int linksSize = dtAlign4(sizeof(dtLink)*maxLinkCount);
+	const int detailMeshesSize = dtAlign4(sizeof(dtPolyDetail)*params->polyCount);
+	const int detailVertsSize = dtAlign4(sizeof(float)*3*uniqueDetailVertCount);
+	const int detailTrisSize = dtAlign4(sizeof(unsigned char)*4*detailTriCount);
+	const int bvTreeSize = params->buildBvTree ? dtAlign4(sizeof(dtBVNode)*params->polyCount*2) : 0;
+	const int offMeshConsSize = dtAlign4(sizeof(dtOffMeshConnection)*storedOffMeshConCount);
+	
+	const int dataSize = headerSize + vertsSize + polysSize + linksSize +
+						 detailMeshesSize + detailVertsSize + detailTrisSize +
+						 bvTreeSize + offMeshConsSize;
+						 
+	unsigned char* data = (unsigned char*)dtAlloc(sizeof(unsigned char)*dataSize, DT_ALLOC_PERM);
+	if (!data)
+	{
+		dtFree(offMeshConClass);
+		return false;
+	}
+	memset(data, 0, dataSize);
+	
+	unsigned char* d = data;
+	dtMeshHeader* header = (dtMeshHeader*)d; d += headerSize;
+	float* navVerts = (float*)d; d += vertsSize;
+	dtPoly* navPolys = (dtPoly*)d; d += polysSize;
+	d += linksSize;
+	dtPolyDetail* navDMeshes = (dtPolyDetail*)d; d += detailMeshesSize;
+	float* navDVerts = (float*)d; d += detailVertsSize;
+	unsigned char* navDTris = (unsigned char*)d; d += detailTrisSize;
+	dtBVNode* navBvtree = (dtBVNode*)d; d += bvTreeSize;
+	dtOffMeshConnection* offMeshCons = (dtOffMeshConnection*)d; d += offMeshConsSize;
+	
+	
+	// Store header
+	header->magic = DT_NAVMESH_MAGIC;
+	header->version = DT_NAVMESH_VERSION;
+	header->x = params->tileX;
+	header->y = params->tileY;
+	header->layer = params->tileLayer;
+	header->userId = params->userId;
+	header->polyCount = totPolyCount;
+	header->vertCount = totVertCount;
+	header->maxLinkCount = maxLinkCount;
+	dtVcopy(header->bmin, params->bmin);
+	dtVcopy(header->bmax, params->bmax);
+	header->detailMeshCount = params->polyCount;
+	header->detailVertCount = uniqueDetailVertCount;
+	header->detailTriCount = detailTriCount;
+	header->bvQuantFactor = 1.0f / params->cs;
+	header->offMeshBase = params->polyCount;
+	header->walkableHeight = params->walkableHeight;
+	header->walkableRadius = params->walkableRadius;
+	header->walkableClimb = params->walkableClimb;
+	header->offMeshConCount = storedOffMeshConCount;
+	header->bvNodeCount = params->buildBvTree ? params->polyCount*2 : 0;
+	
+	const int offMeshVertsBase = params->vertCount;
+	const int offMeshPolyBase = params->polyCount;
+	
+	// Store vertices
+	// Mesh vertices
+	for (int i = 0; i < params->vertCount; ++i)
+	{
+		const unsigned short* iv = &params->verts[i*3];
+		float* v = &navVerts[i*3];
+		v[0] = params->bmin[0] + iv[0] * params->cs;
+		v[1] = params->bmin[1] + iv[1] * params->ch;
+		v[2] = params->bmin[2] + iv[2] * params->cs;
+	}
+	// Off-mesh link vertices.
+	int n = 0;
+	for (int i = 0; i < params->offMeshConCount; ++i)
+	{
+		// Only store connections which start from this tile.
+		if (offMeshConClass[i*2+0] == 0xff)
+		{
+			const float* linkv = &params->offMeshConVerts[i*2*3];
+			float* v = &navVerts[(offMeshVertsBase + n*2)*3];
+			dtVcopy(&v[0], &linkv[0]);
+			dtVcopy(&v[3], &linkv[3]);
+			n++;
+		}
+	}
+	
+	// Store polygons
+	// Mesh polys
+	const unsigned short* src = params->polys;
+	for (int i = 0; i < params->polyCount; ++i)
+	{
+		dtPoly* p = &navPolys[i];
+		p->vertCount = 0;
+		p->flags = params->polyFlags[i];
+		p->setArea(params->polyAreas[i]);
+		p->setType(DT_POLYTYPE_GROUND);
+		for (int j = 0; j < nvp; ++j)
+		{
+			if (src[j] == MESH_NULL_IDX) break;
+			p->verts[j] = src[j];
+			if (src[nvp+j] & 0x8000)
+			{
+				// Border or portal edge.
+				unsigned short dir = src[nvp+j] & 0xf;
+				if (dir == 0xf) // Border
+					p->neis[j] = 0;
+				else if (dir == 0) // Portal x-
+					p->neis[j] = DT_EXT_LINK | 4;
+				else if (dir == 1) // Portal z+
+					p->neis[j] = DT_EXT_LINK | 2;
+				else if (dir == 2) // Portal x+
+					p->neis[j] = DT_EXT_LINK | 0;
+				else if (dir == 3) // Portal z-
+					p->neis[j] = DT_EXT_LINK | 6;
+			}
+			else
+			{
+				// Normal connection
+				p->neis[j] = src[nvp+j]+1;
+			}
+			
+			p->vertCount++;
+		}
+		src += nvp*2;
+	}
+	// Off-mesh connection vertices.
+	n = 0;
+	for (int i = 0; i < params->offMeshConCount; ++i)
+	{
+		// Only store connections which start from this tile.
+		if (offMeshConClass[i*2+0] == 0xff)
+		{
+			dtPoly* p = &navPolys[offMeshPolyBase+n];
+			p->vertCount = 2;
+			p->verts[0] = (unsigned short)(offMeshVertsBase + n*2+0);
+			p->verts[1] = (unsigned short)(offMeshVertsBase + n*2+1);
+			p->flags = params->offMeshConFlags[i];
+			p->setArea(params->offMeshConAreas[i]);
+			p->setType(DT_POLYTYPE_OFFMESH_CONNECTION);
+			n++;
+		}
+	}
+
+	// Store detail meshes and vertices.
+	// The nav polygon vertices are stored as the first vertices on each mesh.
+	// We compress the mesh data by skipping them and using the navmesh coordinates.
+	if (params->detailMeshes)
+	{
+		unsigned short vbase = 0;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			dtPolyDetail& dtl = navDMeshes[i];
+			const int vb = (int)params->detailMeshes[i*4+0];
+			const int ndv = (int)params->detailMeshes[i*4+1];
+			const int nv = navPolys[i].vertCount;
+			dtl.vertBase = (unsigned int)vbase;
+			dtl.vertCount = (unsigned char)(ndv-nv);
+			dtl.triBase = (unsigned int)params->detailMeshes[i*4+2];
+			dtl.triCount = (unsigned char)params->detailMeshes[i*4+3];
+			// Copy vertices except the first 'nv' verts which are equal to nav poly verts.
+			if (ndv-nv)
+			{
+				memcpy(&navDVerts[vbase*3], &params->detailVerts[(vb+nv)*3], sizeof(float)*3*(ndv-nv));
+				vbase += (unsigned short)(ndv-nv);
+			}
+		}
+		// Store triangles.
+		memcpy(navDTris, params->detailTris, sizeof(unsigned char)*4*params->detailTriCount);
+	}
+	else
+	{
+		// Create dummy detail mesh by triangulating polys.
+		int tbase = 0;
+		for (int i = 0; i < params->polyCount; ++i)
+		{
+			dtPolyDetail& dtl = navDMeshes[i];
+			const int nv = navPolys[i].vertCount;
+			dtl.vertBase = 0;
+			dtl.vertCount = 0;
+			dtl.triBase = (unsigned int)tbase;
+			dtl.triCount = (unsigned char)(nv-2);
+			// Triangulate polygon (local indices).
+			for (int j = 2; j < nv; ++j)
+			{
+				unsigned char* t = &navDTris[tbase*4];
+				t[0] = 0;
+				t[1] = (unsigned char)(j-1);
+				t[2] = (unsigned char)j;
+				// Bit for each edge that belongs to poly boundary.
+				t[3] = (1<<2);
+				if (j == 2) t[3] |= (1<<0);
+				if (j == nv-1) t[3] |= (1<<4);
+				tbase++;
+			}
+		}
+	}
+
+	// Store and create BVtree.
+	// TODO: take detail mesh into account! use byte per bbox extent?
+	if (params->buildBvTree)
+	{
+		createBVTree(params->verts, params->vertCount, params->polys, params->polyCount,
+					 nvp, params->cs, params->ch, params->polyCount*2, navBvtree);
+	}
+	
+	// Store Off-Mesh connections.
+	n = 0;
+	for (int i = 0; i < params->offMeshConCount; ++i)
+	{
+		// Only store connections which start from this tile.
+		if (offMeshConClass[i*2+0] == 0xff)
+		{
+			dtOffMeshConnection* con = &offMeshCons[n];
+			con->poly = (unsigned short)(offMeshPolyBase + n);
+			// Copy connection end-points.
+			const float* endPts = &params->offMeshConVerts[i*2*3];
+			dtVcopy(&con->pos[0], &endPts[0]);
+			dtVcopy(&con->pos[3], &endPts[3]);
+			con->rad = params->offMeshConRad[i];
+			con->flags = params->offMeshConDir[i] ? DT_OFFMESH_CON_BIDIR : 0;
+			con->side = offMeshConClass[i*2+1];
+			if (params->offMeshConUserID)
+				con->userId = params->offMeshConUserID[i];
+			n++;
+		}
+	}
+		
+	dtFree(offMeshConClass);
+	
+	*outData = data;
+	*outDataSize = dataSize;
+	
+	return true;
+}
+
+bool dtNavMeshHeaderSwapEndian(unsigned char* data, const int /*dataSize*/)
+{
+	dtMeshHeader* header = (dtMeshHeader*)data;
+	
+	int swappedMagic = DT_NAVMESH_MAGIC;
+	int swappedVersion = DT_NAVMESH_VERSION;
+	dtSwapEndian(&swappedMagic);
+	dtSwapEndian(&swappedVersion);
+	
+	if ((header->magic != DT_NAVMESH_MAGIC || header->version != DT_NAVMESH_VERSION) &&
+		(header->magic != swappedMagic || header->version != swappedVersion))
+	{
+		return false;
+	}
+		
+	dtSwapEndian(&header->magic);
+	dtSwapEndian(&header->version);
+	dtSwapEndian(&header->x);
+	dtSwapEndian(&header->y);
+	dtSwapEndian(&header->layer);
+	dtSwapEndian(&header->userId);
+	dtSwapEndian(&header->polyCount);
+	dtSwapEndian(&header->vertCount);
+	dtSwapEndian(&header->maxLinkCount);
+	dtSwapEndian(&header->detailMeshCount);
+	dtSwapEndian(&header->detailVertCount);
+	dtSwapEndian(&header->detailTriCount);
+	dtSwapEndian(&header->bvNodeCount);
+	dtSwapEndian(&header->offMeshConCount);
+	dtSwapEndian(&header->offMeshBase);
+	dtSwapEndian(&header->walkableHeight);
+	dtSwapEndian(&header->walkableRadius);
+	dtSwapEndian(&header->walkableClimb);
+	dtSwapEndian(&header->bmin[0]);
+	dtSwapEndian(&header->bmin[1]);
+	dtSwapEndian(&header->bmin[2]);
+	dtSwapEndian(&header->bmax[0]);
+	dtSwapEndian(&header->bmax[1]);
+	dtSwapEndian(&header->bmax[2]);
+	dtSwapEndian(&header->bvQuantFactor);
+
+	// Freelist index and pointers are updated when tile is added, no need to swap.
+
+	return true;
+}
+
+/// @par
+///
+/// @warning This function assumes that the header is in the correct endianess already. 
+/// Call #dtNavMeshHeaderSwapEndian() first on the data if the data is expected to be in wrong endianess 
+/// to start with. Call #dtNavMeshHeaderSwapEndian() after the data has been swapped if converting from 
+/// native to foreign endianess.
+bool dtNavMeshDataSwapEndian(unsigned char* data, const int /*dataSize*/)
+{
+	// Make sure the data is in right format.
+	dtMeshHeader* header = (dtMeshHeader*)data;
+	if (header->magic != DT_NAVMESH_MAGIC)
+		return false;
+	if (header->version != DT_NAVMESH_VERSION)
+		return false;
+	
+	// Patch header pointers.
+	const int headerSize = dtAlign4(sizeof(dtMeshHeader));
+	const int vertsSize = dtAlign4(sizeof(float)*3*header->vertCount);
+	const int polysSize = dtAlign4(sizeof(dtPoly)*header->polyCount);
+	const int linksSize = dtAlign4(sizeof(dtLink)*(header->maxLinkCount));
+	const int detailMeshesSize = dtAlign4(sizeof(dtPolyDetail)*header->detailMeshCount);
+	const int detailVertsSize = dtAlign4(sizeof(float)*3*header->detailVertCount);
+	const int detailTrisSize = dtAlign4(sizeof(unsigned char)*4*header->detailTriCount);
+	const int bvtreeSize = dtAlign4(sizeof(dtBVNode)*header->bvNodeCount);
+	const int offMeshLinksSize = dtAlign4(sizeof(dtOffMeshConnection)*header->offMeshConCount);
+	
+	unsigned char* d = data + headerSize;
+	float* verts = (float*)d; d += vertsSize;
+	dtPoly* polys = (dtPoly*)d; d += polysSize;
+	/*dtLink* links = (dtLink*)d;*/ d += linksSize;
+	dtPolyDetail* detailMeshes = (dtPolyDetail*)d; d += detailMeshesSize;
+	float* detailVerts = (float*)d; d += detailVertsSize;
+	/*unsigned char* detailTris = (unsigned char*)d;*/ d += detailTrisSize;
+	dtBVNode* bvTree = (dtBVNode*)d; d += bvtreeSize;
+	dtOffMeshConnection* offMeshCons = (dtOffMeshConnection*)d; d += offMeshLinksSize;
+	
+	// Vertices
+	for (int i = 0; i < header->vertCount*3; ++i)
+	{
+		dtSwapEndian(&verts[i]);
+	}
+
+	// Polys
+	for (int i = 0; i < header->polyCount; ++i)
+	{
+		dtPoly* p = &polys[i];
+		// poly->firstLink is update when tile is added, no need to swap.
+		for (int j = 0; j < DT_VERTS_PER_POLYGON; ++j)
+		{
+			dtSwapEndian(&p->verts[j]);
+			dtSwapEndian(&p->neis[j]);
+		}
+		dtSwapEndian(&p->flags);
+	}
+
+	// Links are rebuild when tile is added, no need to swap.
+
+	// Detail meshes
+	for (int i = 0; i < header->detailMeshCount; ++i)
+	{
+		dtPolyDetail* pd = &detailMeshes[i];
+		dtSwapEndian(&pd->vertBase);
+		dtSwapEndian(&pd->triBase);
+	}
+	
+	// Detail verts
+	for (int i = 0; i < header->detailVertCount*3; ++i)
+	{
+		dtSwapEndian(&detailVerts[i]);
+	}
+
+	// BV-tree
+	for (int i = 0; i < header->bvNodeCount; ++i)
+	{
+		dtBVNode* node = &bvTree[i];
+		for (int j = 0; j < 3; ++j)
+		{
+			dtSwapEndian(&node->bmin[j]);
+			dtSwapEndian(&node->bmax[j]);
+		}
+		dtSwapEndian(&node->i);
+	}
+
+	// Off-mesh Connections.
+	for (int i = 0; i < header->offMeshConCount; ++i)
+	{
+		dtOffMeshConnection* con = &offMeshCons[i];
+		for (int j = 0; j < 6; ++j)
+			dtSwapEndian(&con->pos[j]);
+		dtSwapEndian(&con->rad);
+		dtSwapEndian(&con->poly);
+	}
+	
+	return true;
+}
diff --git a/Engine/lib/recast/Detour/Source/DetourNavMeshQuery.cpp b/Engine/lib/recast/Detour/Source/DetourNavMeshQuery.cpp
new file mode 100644
index 000000000..0eb001146
--- /dev/null
+++ b/Engine/lib/recast/Detour/Source/DetourNavMeshQuery.cpp
@@ -0,0 +1,3318 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <math.h>
+#include <float.h>
+#include <string.h>
+#include "DetourNavMeshQuery.h"
+#include "DetourNavMesh.h"
+#include "DetourNode.h"
+#include "DetourCommon.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+#include <new>
+
+/// @class dtQueryFilter
+///
+/// <b>The Default Implementation</b>
+/// 
+/// At construction: All area costs default to 1.0.  All flags are included
+/// and none are excluded.
+/// 
+/// If a polygon has both an include and an exclude flag, it will be excluded.
+/// 
+/// The way filtering works, a navigation mesh polygon must have at least one flag 
+/// set to ever be considered by a query. So a polygon with no flags will never
+/// be considered.
+///
+/// Setting the include flags to 0 will result in all polygons being excluded.
+///
+/// <b>Custom Implementations</b>
+/// 
+/// DT_VIRTUAL_QUERYFILTER must be defined in order to extend this class.
+/// 
+/// Implement a custom query filter by overriding the virtual passFilter() 
+/// and getCost() functions. If this is done, both functions should be as 
+/// fast as possible. Use cached local copies of data rather than accessing 
+/// your own objects where possible.
+/// 
+/// Custom implementations do not need to adhere to the flags or cost logic 
+/// used by the default implementation.  
+/// 
+/// In order for A* searches to work properly, the cost should be proportional to
+/// the travel distance. Implementing a cost modifier less than 1.0 is likely 
+/// to lead to problems during pathfinding.
+///
+/// @see dtNavMeshQuery
+
+dtQueryFilter::dtQueryFilter() :
+	m_includeFlags(0xffff),
+	m_excludeFlags(0)
+{
+	for (int i = 0; i < DT_MAX_AREAS; ++i)
+		m_areaCost[i] = 1.0f;
+}
+
+#ifdef DT_VIRTUAL_QUERYFILTER
+bool dtQueryFilter::passFilter(const dtPolyRef /*ref*/,
+							   const dtMeshTile* /*tile*/,
+							   const dtPoly* poly) const
+{
+	return (poly->flags & m_includeFlags) != 0 && (poly->flags & m_excludeFlags) == 0;
+}
+
+float dtQueryFilter::getCost(const float* pa, const float* pb,
+							 const dtPolyRef /*prevRef*/, const dtMeshTile* /*prevTile*/, const dtPoly* /*prevPoly*/,
+							 const dtPolyRef /*curRef*/, const dtMeshTile* /*curTile*/, const dtPoly* curPoly,
+							 const dtPolyRef /*nextRef*/, const dtMeshTile* /*nextTile*/, const dtPoly* /*nextPoly*/) const
+{
+	return dtVdist(pa, pb) * m_areaCost[curPoly->getArea()];
+}
+#else
+inline bool dtQueryFilter::passFilter(const dtPolyRef /*ref*/,
+									  const dtMeshTile* /*tile*/,
+									  const dtPoly* poly) const
+{
+	return (poly->flags & m_includeFlags) != 0 && (poly->flags & m_excludeFlags) == 0;
+}
+
+inline float dtQueryFilter::getCost(const float* pa, const float* pb,
+									const dtPolyRef /*prevRef*/, const dtMeshTile* /*prevTile*/, const dtPoly* /*prevPoly*/,
+									const dtPolyRef /*curRef*/, const dtMeshTile* /*curTile*/, const dtPoly* curPoly,
+									const dtPolyRef /*nextRef*/, const dtMeshTile* /*nextTile*/, const dtPoly* /*nextPoly*/) const
+{
+	return dtVdist(pa, pb) * m_areaCost[curPoly->getArea()];
+}
+#endif	
+	
+static const float H_SCALE = 0.999f; // Search heuristic scale.
+
+
+dtNavMeshQuery* dtAllocNavMeshQuery()
+{
+	void* mem = dtAlloc(sizeof(dtNavMeshQuery), DT_ALLOC_PERM);
+	if (!mem) return 0;
+	return new(mem) dtNavMeshQuery;
+}
+
+void dtFreeNavMeshQuery(dtNavMeshQuery* navmesh)
+{
+	if (!navmesh) return;
+	navmesh->~dtNavMeshQuery();
+	dtFree(navmesh);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/// @class dtNavMeshQuery
+///
+/// For methods that support undersized buffers, if the buffer is too small 
+/// to hold the entire result set the return status of the method will include 
+/// the #DT_BUFFER_TOO_SMALL flag.
+///
+/// Constant member functions can be used by multiple clients without side
+/// effects. (E.g. No change to the closed list. No impact on an in-progress
+/// sliced path query. Etc.)
+/// 
+/// Walls and portals: A @e wall is a polygon segment that is 
+/// considered impassable. A @e portal is a passable segment between polygons.
+/// A portal may be treated as a wall based on the dtQueryFilter used for a query.
+///
+/// @see dtNavMesh, dtQueryFilter, #dtAllocNavMeshQuery(), #dtAllocNavMeshQuery()
+
+dtNavMeshQuery::dtNavMeshQuery() :
+	m_nav(0),
+	m_tinyNodePool(0),
+	m_nodePool(0),
+	m_openList(0)
+{
+	memset(&m_query, 0, sizeof(dtQueryData));
+}
+
+dtNavMeshQuery::~dtNavMeshQuery()
+{
+	if (m_tinyNodePool)
+		m_tinyNodePool->~dtNodePool();
+	if (m_nodePool)
+		m_nodePool->~dtNodePool();
+	if (m_openList)
+		m_openList->~dtNodeQueue();
+	dtFree(m_tinyNodePool);
+	dtFree(m_nodePool);
+	dtFree(m_openList);
+}
+
+/// @par 
+///
+/// Must be the first function called after construction, before other
+/// functions are used.
+///
+/// This function can be used multiple times.
+dtStatus dtNavMeshQuery::init(const dtNavMesh* nav, const int maxNodes)
+{
+	m_nav = nav;
+	
+	if (!m_nodePool || m_nodePool->getMaxNodes() < maxNodes)
+	{
+		if (m_nodePool)
+		{
+			m_nodePool->~dtNodePool();
+			dtFree(m_nodePool);
+			m_nodePool = 0;
+		}
+		m_nodePool = new (dtAlloc(sizeof(dtNodePool), DT_ALLOC_PERM)) dtNodePool(maxNodes, dtNextPow2(maxNodes/4));
+		if (!m_nodePool)
+			return DT_FAILURE | DT_OUT_OF_MEMORY;
+	}
+	else
+	{
+		m_nodePool->clear();
+	}
+	
+	if (!m_tinyNodePool)
+	{
+		m_tinyNodePool = new (dtAlloc(sizeof(dtNodePool), DT_ALLOC_PERM)) dtNodePool(64, 32);
+		if (!m_tinyNodePool)
+			return DT_FAILURE | DT_OUT_OF_MEMORY;
+	}
+	else
+	{
+		m_tinyNodePool->clear();
+	}
+	
+	// TODO: check the open list size too.
+	if (!m_openList || m_openList->getCapacity() < maxNodes)
+	{
+		if (m_openList)
+		{
+			m_openList->~dtNodeQueue();
+			dtFree(m_openList);
+			m_openList = 0;
+		}
+		m_openList = new (dtAlloc(sizeof(dtNodeQueue), DT_ALLOC_PERM)) dtNodeQueue(maxNodes);
+		if (!m_openList)
+			return DT_FAILURE | DT_OUT_OF_MEMORY;
+	}
+	else
+	{
+		m_openList->clear();
+	}
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtNavMeshQuery::findRandomPoint(const dtQueryFilter* filter, float (*frand)(),
+										 dtPolyRef* randomRef, float* randomPt) const
+{
+	dtAssert(m_nav);
+	
+	// Randomly pick one tile. Assume that all tiles cover roughly the same area.
+	const dtMeshTile* tile = 0;
+	float tsum = 0.0f;
+	for (int i = 0; i < m_nav->getMaxTiles(); i++)
+	{
+		const dtMeshTile* t = m_nav->getTile(i);
+		if (!t || !t->header) continue;
+		
+		// Choose random tile using reservoi sampling.
+		const float area = 1.0f; // Could be tile area too.
+		tsum += area;
+		const float u = frand();
+		if (u*tsum <= area)
+			tile = t;
+	}
+	if (!tile)
+		return DT_FAILURE;
+
+	// Randomly pick one polygon weighted by polygon area.
+	const dtPoly* poly = 0;
+	dtPolyRef polyRef = 0;
+	const dtPolyRef base = m_nav->getPolyRefBase(tile);
+
+	float areaSum = 0.0f;
+	for (int i = 0; i < tile->header->polyCount; ++i)
+	{
+		const dtPoly* p = &tile->polys[i];
+		// Do not return off-mesh connection polygons.
+		if (p->getType() != DT_POLYTYPE_GROUND)
+			continue;
+		// Must pass filter
+		const dtPolyRef ref = base | (dtPolyRef)i;
+		if (!filter->passFilter(ref, tile, p))
+			continue;
+
+		// Calc area of the polygon.
+		float polyArea = 0.0f;
+		for (int j = 2; j < p->vertCount; ++j)
+		{
+			const float* va = &tile->verts[p->verts[0]*3];
+			const float* vb = &tile->verts[p->verts[j-1]*3];
+			const float* vc = &tile->verts[p->verts[j]*3];
+			polyArea += dtTriArea2D(va,vb,vc);
+		}
+
+		// Choose random polygon weighted by area, using reservoi sampling.
+		areaSum += polyArea;
+		const float u = frand();
+		if (u*areaSum <= polyArea)
+		{
+			poly = p;
+			polyRef = ref;
+		}
+	}
+	
+	if (!poly)
+		return DT_FAILURE;
+
+	// Randomly pick point on polygon.
+	const float* v = &tile->verts[poly->verts[0]*3];
+	float verts[3*DT_VERTS_PER_POLYGON];
+	float areas[DT_VERTS_PER_POLYGON];
+	dtVcopy(&verts[0*3],v);
+	for (int j = 1; j < poly->vertCount; ++j)
+	{
+		v = &tile->verts[poly->verts[j]*3];
+		dtVcopy(&verts[j*3],v);
+	}
+	
+	const float s = frand();
+	const float t = frand();
+	
+	float pt[3];
+	dtRandomPointInConvexPoly(verts, poly->vertCount, areas, s, t, pt);
+	
+	float h = 0.0f;
+	dtStatus status = getPolyHeight(polyRef, pt, &h);
+	if (dtStatusFailed(status))
+		return status;
+	pt[1] = h;
+	
+	dtVcopy(randomPt, pt);
+	*randomRef = polyRef;
+
+	return DT_SUCCESS;
+}
+
+dtStatus dtNavMeshQuery::findRandomPointAroundCircle(dtPolyRef startRef, const float* centerPos, const float radius,
+													 const dtQueryFilter* filter, float (*frand)(),
+													 dtPolyRef* randomRef, float* randomPt) const
+{
+	dtAssert(m_nav);
+	dtAssert(m_nodePool);
+	dtAssert(m_openList);
+	
+	// Validate input
+	if (!startRef || !m_nav->isValidPolyRef(startRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	const dtMeshTile* startTile = 0;
+	const dtPoly* startPoly = 0;
+	m_nav->getTileAndPolyByRefUnsafe(startRef, &startTile, &startPoly);
+	if (!filter->passFilter(startRef, startTile, startPoly))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	m_nodePool->clear();
+	m_openList->clear();
+	
+	dtNode* startNode = m_nodePool->getNode(startRef);
+	dtVcopy(startNode->pos, centerPos);
+	startNode->pidx = 0;
+	startNode->cost = 0;
+	startNode->total = 0;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_OPEN;
+	m_openList->push(startNode);
+	
+	dtStatus status = DT_SUCCESS;
+	
+	const float radiusSqr = dtSqr(radius);
+	float areaSum = 0.0f;
+
+	const dtMeshTile* randomTile = 0;
+	const dtPoly* randomPoly = 0;
+	dtPolyRef randomPolyRef = 0;
+
+	while (!m_openList->empty())
+	{
+		dtNode* bestNode = m_openList->pop();
+		bestNode->flags &= ~DT_NODE_OPEN;
+		bestNode->flags |= DT_NODE_CLOSED;
+		
+		// Get poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef bestRef = bestNode->id;
+		const dtMeshTile* bestTile = 0;
+		const dtPoly* bestPoly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(bestRef, &bestTile, &bestPoly);
+
+		// Place random locations on on ground.
+		if (bestPoly->getType() == DT_POLYTYPE_GROUND)
+		{
+			// Calc area of the polygon.
+			float polyArea = 0.0f;
+			for (int j = 2; j < bestPoly->vertCount; ++j)
+			{
+				const float* va = &bestTile->verts[bestPoly->verts[0]*3];
+				const float* vb = &bestTile->verts[bestPoly->verts[j-1]*3];
+				const float* vc = &bestTile->verts[bestPoly->verts[j]*3];
+				polyArea += dtTriArea2D(va,vb,vc);
+			}
+			// Choose random polygon weighted by area, using reservoi sampling.
+			areaSum += polyArea;
+			const float u = frand();
+			if (u*areaSum <= polyArea)
+			{
+				randomTile = bestTile;
+				randomPoly = bestPoly;
+				randomPolyRef = bestRef;
+			}
+		}
+		
+		
+		// Get parent poly and tile.
+		dtPolyRef parentRef = 0;
+		const dtMeshTile* parentTile = 0;
+		const dtPoly* parentPoly = 0;
+		if (bestNode->pidx)
+			parentRef = m_nodePool->getNodeAtIdx(bestNode->pidx)->id;
+		if (parentRef)
+			m_nav->getTileAndPolyByRefUnsafe(parentRef, &parentTile, &parentPoly);
+		
+		for (unsigned int i = bestPoly->firstLink; i != DT_NULL_LINK; i = bestTile->links[i].next)
+		{
+			const dtLink* link = &bestTile->links[i];
+			dtPolyRef neighbourRef = link->ref;
+			// Skip invalid neighbours and do not follow back to parent.
+			if (!neighbourRef || neighbourRef == parentRef)
+				continue;
+			
+			// Expand to neighbour
+			const dtMeshTile* neighbourTile = 0;
+			const dtPoly* neighbourPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(neighbourRef, &neighbourTile, &neighbourPoly);
+			
+			// Do not advance if the polygon is excluded by the filter.
+			if (!filter->passFilter(neighbourRef, neighbourTile, neighbourPoly))
+				continue;
+			
+			// Find edge and calc distance to the edge.
+			float va[3], vb[3];
+			if (!getPortalPoints(bestRef, bestPoly, bestTile, neighbourRef, neighbourPoly, neighbourTile, va, vb))
+				continue;
+			
+			// If the circle is not touching the next polygon, skip it.
+			float tseg;
+			float distSqr = dtDistancePtSegSqr2D(centerPos, va, vb, tseg);
+			if (distSqr > radiusSqr)
+				continue;
+			
+			dtNode* neighbourNode = m_nodePool->getNode(neighbourRef);
+			if (!neighbourNode)
+			{
+				status |= DT_OUT_OF_NODES;
+				continue;
+			}
+			
+			if (neighbourNode->flags & DT_NODE_CLOSED)
+				continue;
+			
+			// Cost
+			if (neighbourNode->flags == 0)
+				dtVlerp(neighbourNode->pos, va, vb, 0.5f);
+			
+			const float total = bestNode->total + dtVdist(bestNode->pos, neighbourNode->pos);
+			
+			// The node is already in open list and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_OPEN) && total >= neighbourNode->total)
+				continue;
+			
+			neighbourNode->id = neighbourRef;
+			neighbourNode->flags = (neighbourNode->flags & ~DT_NODE_CLOSED);
+			neighbourNode->pidx = m_nodePool->getNodeIdx(bestNode);
+			neighbourNode->total = total;
+			
+			if (neighbourNode->flags & DT_NODE_OPEN)
+			{
+				m_openList->modify(neighbourNode);
+			}
+			else
+			{
+				neighbourNode->flags = DT_NODE_OPEN;
+				m_openList->push(neighbourNode);
+			}
+		}
+	}
+	
+	if (!randomPoly)
+		return DT_FAILURE;
+	
+	// Randomly pick point on polygon.
+	const float* v = &randomTile->verts[randomPoly->verts[0]*3];
+	float verts[3*DT_VERTS_PER_POLYGON];
+	float areas[DT_VERTS_PER_POLYGON];
+	dtVcopy(&verts[0*3],v);
+	for (int j = 1; j < randomPoly->vertCount; ++j)
+	{
+		v = &randomTile->verts[randomPoly->verts[j]*3];
+		dtVcopy(&verts[j*3],v);
+	}
+	
+	const float s = frand();
+	const float t = frand();
+	
+	float pt[3];
+	dtRandomPointInConvexPoly(verts, randomPoly->vertCount, areas, s, t, pt);
+	
+	float h = 0.0f;
+	dtStatus stat = getPolyHeight(randomPolyRef, pt, &h);
+	if (dtStatusFailed(status))
+		return stat;
+	pt[1] = h;
+	
+	dtVcopy(randomPt, pt);
+	*randomRef = randomPolyRef;
+	
+	return DT_SUCCESS;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+
+/// @par
+///
+/// Uses the detail polygons to find the surface height. (Most accurate.)
+///
+/// @p pos does not have to be within the bounds of the polygon or navigation mesh.
+///
+/// See closestPointOnPolyBoundary() for a limited but faster option.
+///
+dtStatus dtNavMeshQuery::closestPointOnPoly(dtPolyRef ref, const float* pos, float* closest) const
+{
+	dtAssert(m_nav);
+	const dtMeshTile* tile = 0;
+	const dtPoly* poly = 0;
+	if (dtStatusFailed(m_nav->getTileAndPolyByRef(ref, &tile, &poly)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	if (!tile)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	closestPointOnPolyInTile(tile, poly, pos, closest);
+	
+	return DT_SUCCESS;
+}
+
+void dtNavMeshQuery::closestPointOnPolyInTile(const dtMeshTile* tile, const dtPoly* poly,
+											  const float* pos, float* closest) const
+{
+	// Off-mesh connections don't have detail polygons.
+	if (poly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+	{
+		const float* v0 = &tile->verts[poly->verts[0]*3];
+		const float* v1 = &tile->verts[poly->verts[1]*3];
+		const float d0 = dtVdist(pos, v0);
+		const float d1 = dtVdist(pos, v1);
+		const float u = d0 / (d0+d1);
+		dtVlerp(closest, v0, v1, u);
+		return;
+	}
+
+	const unsigned int ip = (unsigned int)(poly - tile->polys);
+	const dtPolyDetail* pd = &tile->detailMeshes[ip];
+
+	// TODO: The commented out version finds 'cylinder distance' instead of 'sphere distance' to the navmesh.
+	// Test and enable.
+/*
+	// Clamp point to be inside the polygon.
+	float verts[DT_VERTS_PER_POLYGON*3];	
+	float edged[DT_VERTS_PER_POLYGON];
+	float edget[DT_VERTS_PER_POLYGON];
+	const int nv = poly->vertCount;
+	for (int i = 0; i < nv; ++i)
+		dtVcopy(&verts[i*3], &tile->verts[poly->verts[i]*3]);
+	
+	dtVcopy(closest, pos);
+	if (!dtDistancePtPolyEdgesSqr(pos, verts, nv, edged, edget))
+	{
+		// Point is outside the polygon, dtClamp to nearest edge.
+		float dmin = FLT_MAX;
+		int imin = -1;
+		for (int i = 0; i < nv; ++i)
+		{
+			if (edged[i] < dmin)
+			{
+				dmin = edged[i];
+				imin = i;
+			}
+		}
+		const float* va = &verts[imin*3];
+		const float* vb = &verts[((imin+1)%nv)*3];
+		dtVlerp(closest, va, vb, edget[imin]);
+	}
+
+	// Find height at the location.
+	for (int j = 0; j < pd->triCount; ++j)
+	{
+		const unsigned char* t = &tile->detailTris[(pd->triBase+j)*4];
+		const float* v[3];
+		for (int k = 0; k < 3; ++k)
+		{
+			if (t[k] < poly->vertCount)
+				v[k] = &tile->verts[poly->verts[t[k]]*3];
+			else
+				v[k] = &tile->detailVerts[(pd->vertBase+(t[k]-poly->vertCount))*3];
+		}
+		float h;
+		if (dtClosestHeightPointTriangle(pos, v[0], v[1], v[2], h))
+		{
+			closest[1] = h;
+			break;
+		}
+	}
+*/
+	float closestDistSqr = FLT_MAX;
+	for (int j = 0; j < pd->triCount; ++j)
+	{
+		const unsigned char* t = &tile->detailTris[(pd->triBase+j)*4];
+		const float* v[3];
+		for (int k = 0; k < 3; ++k)
+		{
+			if (t[k] < poly->vertCount)
+				v[k] = &tile->verts[poly->verts[t[k]]*3];
+			else
+				v[k] = &tile->detailVerts[(pd->vertBase+(t[k]-poly->vertCount))*3];
+		}
+
+		float pt[3];
+		dtClosestPtPointTriangle(pt, pos, v[0], v[1], v[2]);
+		float d = dtVdistSqr(pos, pt);
+		
+		if (d < closestDistSqr)
+		{
+			dtVcopy(closest, pt);
+			closestDistSqr = d;
+		}
+	}
+}
+
+/// @par
+///
+/// Much faster than closestPointOnPoly().
+///
+/// If the provided position lies within the polygon's xz-bounds (above or below), 
+/// then @p pos and @p closest will be equal.
+///
+/// The height of @p closest will be the polygon boundary.  The height detail is not used.
+/// 
+/// @p pos does not have to be within the bounds of the polybon or the navigation mesh.
+/// 
+dtStatus dtNavMeshQuery::closestPointOnPolyBoundary(dtPolyRef ref, const float* pos, float* closest) const
+{
+	dtAssert(m_nav);
+	
+	const dtMeshTile* tile = 0;
+	const dtPoly* poly = 0;
+	if (dtStatusFailed(m_nav->getTileAndPolyByRef(ref, &tile, &poly)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Collect vertices.
+	float verts[DT_VERTS_PER_POLYGON*3];	
+	float edged[DT_VERTS_PER_POLYGON];
+	float edget[DT_VERTS_PER_POLYGON];
+	int nv = 0;
+	for (int i = 0; i < (int)poly->vertCount; ++i)
+	{
+		dtVcopy(&verts[nv*3], &tile->verts[poly->verts[i]*3]);
+		nv++;
+	}		
+	
+	bool inside = dtDistancePtPolyEdgesSqr(pos, verts, nv, edged, edget);
+	if (inside)
+	{
+		// Point is inside the polygon, return the point.
+		dtVcopy(closest, pos);
+	}
+	else
+	{
+		// Point is outside the polygon, dtClamp to nearest edge.
+		float dmin = FLT_MAX;
+		int imin = -1;
+		for (int i = 0; i < nv; ++i)
+		{
+			if (edged[i] < dmin)
+			{
+				dmin = edged[i];
+				imin = i;
+			}
+		}
+		const float* va = &verts[imin*3];
+		const float* vb = &verts[((imin+1)%nv)*3];
+		dtVlerp(closest, va, vb, edget[imin]);
+	}
+	
+	return DT_SUCCESS;
+}
+
+/// @par
+///
+/// Will return #DT_FAILURE if the provided position is outside the xz-bounds 
+/// of the polygon.
+/// 
+dtStatus dtNavMeshQuery::getPolyHeight(dtPolyRef ref, const float* pos, float* height) const
+{
+	dtAssert(m_nav);
+
+	const dtMeshTile* tile = 0;
+	const dtPoly* poly = 0;
+	if (dtStatusFailed(m_nav->getTileAndPolyByRef(ref, &tile, &poly)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	if (poly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+	{
+		const float* v0 = &tile->verts[poly->verts[0]*3];
+		const float* v1 = &tile->verts[poly->verts[1]*3];
+		const float d0 = dtVdist(pos, v0);
+		const float d1 = dtVdist(pos, v1);
+		const float u = d0 / (d0+d1);
+		if (height)
+			*height = v0[1] + (v1[1] - v0[1]) * u;
+		return DT_SUCCESS;
+	}
+	else
+	{
+		const unsigned int ip = (unsigned int)(poly - tile->polys);
+		const dtPolyDetail* pd = &tile->detailMeshes[ip];
+		for (int j = 0; j < pd->triCount; ++j)
+		{
+			const unsigned char* t = &tile->detailTris[(pd->triBase+j)*4];
+			const float* v[3];
+			for (int k = 0; k < 3; ++k)
+			{
+				if (t[k] < poly->vertCount)
+					v[k] = &tile->verts[poly->verts[t[k]]*3];
+				else
+					v[k] = &tile->detailVerts[(pd->vertBase+(t[k]-poly->vertCount))*3];
+			}
+			float h;
+			if (dtClosestHeightPointTriangle(pos, v[0], v[1], v[2], h))
+			{
+				if (height)
+					*height = h;
+				return DT_SUCCESS;
+			}
+		}
+	}
+	
+	return DT_FAILURE | DT_INVALID_PARAM;
+}
+
+/// @par 
+///
+/// @note If the search box does not intersect any polygons the search will 
+/// return #DT_SUCCESS, but @p nearestRef will be zero. So if in doubt, check 
+/// @p nearestRef before using @p nearestPt.
+///
+/// @warning This function is not suitable for large area searches.  If the search
+/// extents overlaps more than 128 polygons it may return an invalid result.
+///
+dtStatus dtNavMeshQuery::findNearestPoly(const float* center, const float* extents,
+										 const dtQueryFilter* filter,
+										 dtPolyRef* nearestRef, float* nearestPt) const
+{
+	dtAssert(m_nav);
+
+	*nearestRef = 0;
+	
+	// Get nearby polygons from proximity grid.
+	dtPolyRef polys[128];
+	int polyCount = 0;
+	if (dtStatusFailed(queryPolygons(center, extents, filter, polys, &polyCount, 128)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Find nearest polygon amongst the nearby polygons.
+	dtPolyRef nearest = 0;
+	float nearestDistanceSqr = FLT_MAX;
+	for (int i = 0; i < polyCount; ++i)
+	{
+		dtPolyRef ref = polys[i];
+		float closestPtPoly[3];
+		closestPointOnPoly(ref, center, closestPtPoly);
+		float d = dtVdistSqr(center, closestPtPoly);
+		if (d < nearestDistanceSqr)
+		{
+			if (nearestPt)
+				dtVcopy(nearestPt, closestPtPoly);
+			nearestDistanceSqr = d;
+			nearest = ref;
+		}
+	}
+	
+	if (nearestRef)
+		*nearestRef = nearest;
+	
+	return DT_SUCCESS;
+}
+
+dtPolyRef dtNavMeshQuery::findNearestPolyInTile(const dtMeshTile* tile, const float* center, const float* extents,
+												const dtQueryFilter* filter, float* nearestPt) const
+{
+	dtAssert(m_nav);
+	
+	float bmin[3], bmax[3];
+	dtVsub(bmin, center, extents);
+	dtVadd(bmax, center, extents);
+	
+	// Get nearby polygons from proximity grid.
+	dtPolyRef polys[128];
+	int polyCount = queryPolygonsInTile(tile, bmin, bmax, filter, polys, 128);
+	
+	// Find nearest polygon amongst the nearby polygons.
+	dtPolyRef nearest = 0;
+	float nearestDistanceSqr = FLT_MAX;
+	for (int i = 0; i < polyCount; ++i)
+	{
+		dtPolyRef ref = polys[i];
+		const dtPoly* poly = &tile->polys[m_nav->decodePolyIdPoly(ref)];
+		float closestPtPoly[3];
+		closestPointOnPolyInTile(tile, poly, center, closestPtPoly);
+			
+		float d = dtVdistSqr(center, closestPtPoly);
+		if (d < nearestDistanceSqr)
+		{
+			if (nearestPt)
+				dtVcopy(nearestPt, closestPtPoly);
+			nearestDistanceSqr = d;
+			nearest = ref;
+		}
+	}
+	
+	return nearest;
+}
+
+int dtNavMeshQuery::queryPolygonsInTile(const dtMeshTile* tile, const float* qmin, const float* qmax,
+										const dtQueryFilter* filter,
+										dtPolyRef* polys, const int maxPolys) const
+{
+	dtAssert(m_nav);
+
+	if (tile->bvTree)
+	{
+		const dtBVNode* node = &tile->bvTree[0];
+		const dtBVNode* end = &tile->bvTree[tile->header->bvNodeCount];
+		const float* tbmin = tile->header->bmin;
+		const float* tbmax = tile->header->bmax;
+		const float qfac = tile->header->bvQuantFactor;
+		
+		// Calculate quantized box
+		unsigned short bmin[3], bmax[3];
+		// dtClamp query box to world box.
+		float minx = dtClamp(qmin[0], tbmin[0], tbmax[0]) - tbmin[0];
+		float miny = dtClamp(qmin[1], tbmin[1], tbmax[1]) - tbmin[1];
+		float minz = dtClamp(qmin[2], tbmin[2], tbmax[2]) - tbmin[2];
+		float maxx = dtClamp(qmax[0], tbmin[0], tbmax[0]) - tbmin[0];
+		float maxy = dtClamp(qmax[1], tbmin[1], tbmax[1]) - tbmin[1];
+		float maxz = dtClamp(qmax[2], tbmin[2], tbmax[2]) - tbmin[2];
+		// Quantize
+		bmin[0] = (unsigned short)(qfac * minx) & 0xfffe;
+		bmin[1] = (unsigned short)(qfac * miny) & 0xfffe;
+		bmin[2] = (unsigned short)(qfac * minz) & 0xfffe;
+		bmax[0] = (unsigned short)(qfac * maxx + 1) | 1;
+		bmax[1] = (unsigned short)(qfac * maxy + 1) | 1;
+		bmax[2] = (unsigned short)(qfac * maxz + 1) | 1;
+		
+		// Traverse tree
+		const dtPolyRef base = m_nav->getPolyRefBase(tile);
+		int n = 0;
+		while (node < end)
+		{
+			const bool overlap = dtOverlapQuantBounds(bmin, bmax, node->bmin, node->bmax);
+			const bool isLeafNode = node->i >= 0;
+			
+			if (isLeafNode && overlap)
+			{
+				dtPolyRef ref = base | (dtPolyRef)node->i;
+				if (filter->passFilter(ref, tile, &tile->polys[node->i]))
+				{
+					if (n < maxPolys)
+						polys[n++] = ref;
+				}
+			}
+			
+			if (overlap || isLeafNode)
+				node++;
+			else
+			{
+				const int escapeIndex = -node->i;
+				node += escapeIndex;
+			}
+		}
+		
+		return n;
+	}
+	else
+	{
+		float bmin[3], bmax[3];
+		int n = 0;
+		const dtPolyRef base = m_nav->getPolyRefBase(tile);
+		for (int i = 0; i < tile->header->polyCount; ++i)
+		{
+			const dtPoly* p = &tile->polys[i];
+			// Do not return off-mesh connection polygons.
+			if (p->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+				continue;
+			// Must pass filter
+			const dtPolyRef ref = base | (dtPolyRef)i;
+			if (!filter->passFilter(ref, tile, p))
+				continue;
+			// Calc polygon bounds.
+			const float* v = &tile->verts[p->verts[0]*3];
+			dtVcopy(bmin, v);
+			dtVcopy(bmax, v);
+			for (int j = 1; j < p->vertCount; ++j)
+			{
+				v = &tile->verts[p->verts[j]*3];
+				dtVmin(bmin, v);
+				dtVmax(bmax, v);
+			}
+			if (dtOverlapBounds(qmin,qmax, bmin,bmax))
+			{
+				if (n < maxPolys)
+					polys[n++] = ref;
+			}
+		}
+		return n;
+	}
+}
+
+/// @par 
+///
+/// If no polygons are found, the function will return #DT_SUCCESS with a
+/// @p polyCount of zero.
+///
+/// If @p polys is too small to hold the entire result set, then the array will 
+/// be filled to capacity. The method of choosing which polygons from the 
+/// full set are included in the partial result set is undefined.
+///
+dtStatus dtNavMeshQuery::queryPolygons(const float* center, const float* extents,
+									   const dtQueryFilter* filter,
+									   dtPolyRef* polys, int* polyCount, const int maxPolys) const
+{
+	dtAssert(m_nav);
+	
+	float bmin[3], bmax[3];
+	dtVsub(bmin, center, extents);
+	dtVadd(bmax, center, extents);
+	
+	// Find tiles the query touches.
+	int minx, miny, maxx, maxy;
+	m_nav->calcTileLoc(bmin, &minx, &miny);
+	m_nav->calcTileLoc(bmax, &maxx, &maxy);
+
+	static const int MAX_NEIS = 32;
+	const dtMeshTile* neis[MAX_NEIS];
+	
+	int n = 0;
+	for (int y = miny; y <= maxy; ++y)
+	{
+		for (int x = minx; x <= maxx; ++x)
+		{
+			const int nneis = m_nav->getTilesAt(x,y,neis,MAX_NEIS);
+			for (int j = 0; j < nneis; ++j)
+			{
+				n += queryPolygonsInTile(neis[j], bmin, bmax, filter, polys+n, maxPolys-n);
+				if (n >= maxPolys)
+				{
+					*polyCount = n;
+					return DT_SUCCESS | DT_BUFFER_TOO_SMALL;
+				}
+			}
+		}
+	}
+	*polyCount = n;
+	
+	return DT_SUCCESS;
+}
+
+/// @par
+///
+/// If the end polygon cannot be reached through the navigation graph,
+/// the last polygon in the path will be the nearest the end polygon.
+///
+/// If the path array is to small to hold the full result, it will be filled as 
+/// far as possible from the start polygon toward the end polygon.
+///
+/// The start and end positions are used to calculate traversal costs. 
+/// (The y-values impact the result.)
+///
+dtStatus dtNavMeshQuery::findPath(dtPolyRef startRef, dtPolyRef endRef,
+								  const float* startPos, const float* endPos,
+								  const dtQueryFilter* filter,
+								  dtPolyRef* path, int* pathCount, const int maxPath) const
+{
+	dtAssert(m_nav);
+	dtAssert(m_nodePool);
+	dtAssert(m_openList);
+	
+	*pathCount = 0;
+	
+	if (!startRef || !endRef)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	if (!maxPath)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Validate input
+	if (!m_nav->isValidPolyRef(startRef) || !m_nav->isValidPolyRef(endRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	if (startRef == endRef)
+	{
+		path[0] = startRef;
+		*pathCount = 1;
+		return DT_SUCCESS;
+	}
+	
+	m_nodePool->clear();
+	m_openList->clear();
+	
+	dtNode* startNode = m_nodePool->getNode(startRef);
+	dtVcopy(startNode->pos, startPos);
+	startNode->pidx = 0;
+	startNode->cost = 0;
+	startNode->total = dtVdist(startPos, endPos) * H_SCALE;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_OPEN;
+	m_openList->push(startNode);
+	
+	dtNode* lastBestNode = startNode;
+	float lastBestNodeCost = startNode->total;
+	
+	dtStatus status = DT_SUCCESS;
+	
+	while (!m_openList->empty())
+	{
+		// Remove node from open list and put it in closed list.
+		dtNode* bestNode = m_openList->pop();
+		bestNode->flags &= ~DT_NODE_OPEN;
+		bestNode->flags |= DT_NODE_CLOSED;
+		
+		// Reached the goal, stop searching.
+		if (bestNode->id == endRef)
+		{
+			lastBestNode = bestNode;
+			break;
+		}
+		
+		// Get current poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef bestRef = bestNode->id;
+		const dtMeshTile* bestTile = 0;
+		const dtPoly* bestPoly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(bestRef, &bestTile, &bestPoly);
+		
+		// Get parent poly and tile.
+		dtPolyRef parentRef = 0;
+		const dtMeshTile* parentTile = 0;
+		const dtPoly* parentPoly = 0;
+		if (bestNode->pidx)
+			parentRef = m_nodePool->getNodeAtIdx(bestNode->pidx)->id;
+		if (parentRef)
+			m_nav->getTileAndPolyByRefUnsafe(parentRef, &parentTile, &parentPoly);
+		
+		for (unsigned int i = bestPoly->firstLink; i != DT_NULL_LINK; i = bestTile->links[i].next)
+		{
+			dtPolyRef neighbourRef = bestTile->links[i].ref;
+			
+			// Skip invalid ids and do not expand back to where we came from.
+			if (!neighbourRef || neighbourRef == parentRef)
+				continue;
+			
+			// Get neighbour poly and tile.
+			// The API input has been cheked already, skip checking internal data.
+			const dtMeshTile* neighbourTile = 0;
+			const dtPoly* neighbourPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(neighbourRef, &neighbourTile, &neighbourPoly);			
+			
+			if (!filter->passFilter(neighbourRef, neighbourTile, neighbourPoly))
+				continue;
+
+			dtNode* neighbourNode = m_nodePool->getNode(neighbourRef);
+			if (!neighbourNode)
+			{
+				status |= DT_OUT_OF_NODES;
+				continue;
+			}
+			
+			// If the node is visited the first time, calculate node position.
+			if (neighbourNode->flags == 0)
+			{
+				getEdgeMidPoint(bestRef, bestPoly, bestTile,
+								neighbourRef, neighbourPoly, neighbourTile,
+								neighbourNode->pos);
+			}
+
+			// Calculate cost and heuristic.
+			float cost = 0;
+			float heuristic = 0;
+			
+			// Special case for last node.
+			if (neighbourRef == endRef)
+			{
+				// Cost
+				const float curCost = filter->getCost(bestNode->pos, neighbourNode->pos,
+													  parentRef, parentTile, parentPoly,
+													  bestRef, bestTile, bestPoly,
+													  neighbourRef, neighbourTile, neighbourPoly);
+				const float endCost = filter->getCost(neighbourNode->pos, endPos,
+													  bestRef, bestTile, bestPoly,
+													  neighbourRef, neighbourTile, neighbourPoly,
+													  0, 0, 0);
+				
+				cost = bestNode->cost + curCost + endCost;
+				heuristic = 0;
+			}
+			else
+			{
+				// Cost
+				const float curCost = filter->getCost(bestNode->pos, neighbourNode->pos,
+													  parentRef, parentTile, parentPoly,
+													  bestRef, bestTile, bestPoly,
+													  neighbourRef, neighbourTile, neighbourPoly);
+				cost = bestNode->cost + curCost;
+				heuristic = dtVdist(neighbourNode->pos, endPos)*H_SCALE;
+			}
+
+			const float total = cost + heuristic;
+			
+			// The node is already in open list and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_OPEN) && total >= neighbourNode->total)
+				continue;
+			// The node is already visited and process, and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_CLOSED) && total >= neighbourNode->total)
+				continue;
+			
+			// Add or update the node.
+			neighbourNode->pidx = m_nodePool->getNodeIdx(bestNode);
+			neighbourNode->id = neighbourRef;
+			neighbourNode->flags = (neighbourNode->flags & ~DT_NODE_CLOSED);
+			neighbourNode->cost = cost;
+			neighbourNode->total = total;
+			
+			if (neighbourNode->flags & DT_NODE_OPEN)
+			{
+				// Already in open, update node location.
+				m_openList->modify(neighbourNode);
+			}
+			else
+			{
+				// Put the node in open list.
+				neighbourNode->flags |= DT_NODE_OPEN;
+				m_openList->push(neighbourNode);
+			}
+			
+			// Update nearest node to target so far.
+			if (heuristic < lastBestNodeCost)
+			{
+				lastBestNodeCost = heuristic;
+				lastBestNode = neighbourNode;
+			}
+		}
+	}
+	
+	if (lastBestNode->id != endRef)
+		status |= DT_PARTIAL_RESULT;
+	
+	// Reverse the path.
+	dtNode* prev = 0;
+	dtNode* node = lastBestNode;
+	do
+	{
+		dtNode* next = m_nodePool->getNodeAtIdx(node->pidx);
+		node->pidx = m_nodePool->getNodeIdx(prev);
+		prev = node;
+		node = next;
+	}
+	while (node);
+	
+	// Store path
+	node = prev;
+	int n = 0;
+	do
+	{
+		path[n++] = node->id;
+		if (n >= maxPath)
+		{
+			status |= DT_BUFFER_TOO_SMALL;
+			break;
+		}
+		node = m_nodePool->getNodeAtIdx(node->pidx);
+	}
+	while (node);
+	
+	*pathCount = n;
+	
+	return status;
+}
+
+/// @par
+///
+/// @warning Calling any non-slice methods before calling finalizeSlicedFindPath() 
+/// or finalizeSlicedFindPathPartial() may result in corrupted data!
+///
+/// The @p filter pointer is stored and used for the duration of the sliced
+/// path query.
+///
+dtStatus dtNavMeshQuery::initSlicedFindPath(dtPolyRef startRef, dtPolyRef endRef,
+											const float* startPos, const float* endPos,
+											const dtQueryFilter* filter)
+{
+	dtAssert(m_nav);
+	dtAssert(m_nodePool);
+	dtAssert(m_openList);
+
+	// Init path state.
+	memset(&m_query, 0, sizeof(dtQueryData));
+	m_query.status = DT_FAILURE;
+	m_query.startRef = startRef;
+	m_query.endRef = endRef;
+	dtVcopy(m_query.startPos, startPos);
+	dtVcopy(m_query.endPos, endPos);
+	m_query.filter = filter;
+	
+	if (!startRef || !endRef)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Validate input
+	if (!m_nav->isValidPolyRef(startRef) || !m_nav->isValidPolyRef(endRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+
+	if (startRef == endRef)
+	{
+		m_query.status = DT_SUCCESS;
+		return DT_SUCCESS;
+	}
+	
+	m_nodePool->clear();
+	m_openList->clear();
+	
+	dtNode* startNode = m_nodePool->getNode(startRef);
+	dtVcopy(startNode->pos, startPos);
+	startNode->pidx = 0;
+	startNode->cost = 0;
+	startNode->total = dtVdist(startPos, endPos) * H_SCALE;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_OPEN;
+	m_openList->push(startNode);
+	
+	m_query.status = DT_IN_PROGRESS;
+	m_query.lastBestNode = startNode;
+	m_query.lastBestNodeCost = startNode->total;
+	
+	return m_query.status;
+}
+	
+dtStatus dtNavMeshQuery::updateSlicedFindPath(const int maxIter, int* doneIters)
+{
+	if (!dtStatusInProgress(m_query.status))
+		return m_query.status;
+
+	// Make sure the request is still valid.
+	if (!m_nav->isValidPolyRef(m_query.startRef) || !m_nav->isValidPolyRef(m_query.endRef))
+	{
+		m_query.status = DT_FAILURE;
+		return DT_FAILURE;
+	}
+		
+	int iter = 0;
+	while (iter < maxIter && !m_openList->empty())
+	{
+		iter++;
+		
+		// Remove node from open list and put it in closed list.
+		dtNode* bestNode = m_openList->pop();
+		bestNode->flags &= ~DT_NODE_OPEN;
+		bestNode->flags |= DT_NODE_CLOSED;
+		
+		// Reached the goal, stop searching.
+		if (bestNode->id == m_query.endRef)
+		{
+			m_query.lastBestNode = bestNode;
+			const dtStatus details = m_query.status & DT_STATUS_DETAIL_MASK;
+			m_query.status = DT_SUCCESS | details;
+			if (doneIters)
+				*doneIters = iter;
+			return m_query.status;
+		}
+		
+		// Get current poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef bestRef = bestNode->id;
+		const dtMeshTile* bestTile = 0;
+		const dtPoly* bestPoly = 0;
+		if (dtStatusFailed(m_nav->getTileAndPolyByRef(bestRef, &bestTile, &bestPoly)))
+		{
+			// The polygon has disappeared during the sliced query, fail.
+			m_query.status = DT_FAILURE;
+			if (doneIters)
+				*doneIters = iter;
+			return m_query.status;
+		}
+		
+		// Get parent poly and tile.
+		dtPolyRef parentRef = 0;
+		const dtMeshTile* parentTile = 0;
+		const dtPoly* parentPoly = 0;
+		if (bestNode->pidx)
+			parentRef = m_nodePool->getNodeAtIdx(bestNode->pidx)->id;
+		if (parentRef)
+		{
+			if (dtStatusFailed(m_nav->getTileAndPolyByRef(parentRef, &parentTile, &parentPoly)))
+			{
+				// The polygon has disappeared during the sliced query, fail.
+				m_query.status = DT_FAILURE;
+				if (doneIters)
+					*doneIters = iter;
+				return m_query.status;
+			}
+		}
+		
+		for (unsigned int i = bestPoly->firstLink; i != DT_NULL_LINK; i = bestTile->links[i].next)
+		{
+			dtPolyRef neighbourRef = bestTile->links[i].ref;
+			
+			// Skip invalid ids and do not expand back to where we came from.
+			if (!neighbourRef || neighbourRef == parentRef)
+				continue;
+			
+			// Get neighbour poly and tile.
+			// The API input has been cheked already, skip checking internal data.
+			const dtMeshTile* neighbourTile = 0;
+			const dtPoly* neighbourPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(neighbourRef, &neighbourTile, &neighbourPoly);			
+			
+			if (!m_query.filter->passFilter(neighbourRef, neighbourTile, neighbourPoly))
+				continue;
+			
+			dtNode* neighbourNode = m_nodePool->getNode(neighbourRef);
+			if (!neighbourNode)
+			{
+				m_query.status |= DT_OUT_OF_NODES;
+				continue;
+			}
+			
+			// If the node is visited the first time, calculate node position.
+			if (neighbourNode->flags == 0)
+			{
+				getEdgeMidPoint(bestRef, bestPoly, bestTile,
+								neighbourRef, neighbourPoly, neighbourTile,
+								neighbourNode->pos);
+			}
+			
+			// Calculate cost and heuristic.
+			float cost = 0;
+			float heuristic = 0;
+			
+			// Special case for last node.
+			if (neighbourRef == m_query.endRef)
+			{
+				// Cost
+				const float curCost = m_query.filter->getCost(bestNode->pos, neighbourNode->pos,
+															  parentRef, parentTile, parentPoly,
+															  bestRef, bestTile, bestPoly,
+															  neighbourRef, neighbourTile, neighbourPoly);
+				const float endCost = m_query.filter->getCost(neighbourNode->pos, m_query.endPos,
+															  bestRef, bestTile, bestPoly,
+															  neighbourRef, neighbourTile, neighbourPoly,
+															  0, 0, 0);
+				
+				cost = bestNode->cost + curCost + endCost;
+				heuristic = 0;
+			}
+			else
+			{
+				// Cost
+				const float curCost = m_query.filter->getCost(bestNode->pos, neighbourNode->pos,
+															  parentRef, parentTile, parentPoly,
+															  bestRef, bestTile, bestPoly,
+															  neighbourRef, neighbourTile, neighbourPoly);
+				cost = bestNode->cost + curCost;
+				heuristic = dtVdist(neighbourNode->pos, m_query.endPos)*H_SCALE;
+			}
+			
+			const float total = cost + heuristic;
+			
+			// The node is already in open list and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_OPEN) && total >= neighbourNode->total)
+				continue;
+			// The node is already visited and process, and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_CLOSED) && total >= neighbourNode->total)
+				continue;
+			
+			// Add or update the node.
+			neighbourNode->pidx = m_nodePool->getNodeIdx(bestNode);
+			neighbourNode->id = neighbourRef;
+			neighbourNode->flags = (neighbourNode->flags & ~DT_NODE_CLOSED);
+			neighbourNode->cost = cost;
+			neighbourNode->total = total;
+			
+			if (neighbourNode->flags & DT_NODE_OPEN)
+			{
+				// Already in open, update node location.
+				m_openList->modify(neighbourNode);
+			}
+			else
+			{
+				// Put the node in open list.
+				neighbourNode->flags |= DT_NODE_OPEN;
+				m_openList->push(neighbourNode);
+			}
+			
+			// Update nearest node to target so far.
+			if (heuristic < m_query.lastBestNodeCost)
+			{
+				m_query.lastBestNodeCost = heuristic;
+				m_query.lastBestNode = neighbourNode;
+			}
+		}
+	}
+	
+	// Exhausted all nodes, but could not find path.
+	if (m_openList->empty())
+	{
+		const dtStatus details = m_query.status & DT_STATUS_DETAIL_MASK;
+		m_query.status = DT_SUCCESS | details;
+	}
+
+	if (doneIters)
+		*doneIters = iter;
+
+	return m_query.status;
+}
+
+dtStatus dtNavMeshQuery::finalizeSlicedFindPath(dtPolyRef* path, int* pathCount, const int maxPath)
+{
+	*pathCount = 0;
+	
+	if (dtStatusFailed(m_query.status))
+	{
+		// Reset query.
+		memset(&m_query, 0, sizeof(dtQueryData));
+		return DT_FAILURE;
+	}
+
+	int n = 0;
+
+	if (m_query.startRef == m_query.endRef)
+	{
+		// Special case: the search starts and ends at same poly.
+		path[n++] = m_query.startRef;
+	}
+	else
+	{
+		// Reverse the path.
+		dtAssert(m_query.lastBestNode);
+		
+		if (m_query.lastBestNode->id != m_query.endRef)
+			m_query.status |= DT_PARTIAL_RESULT;
+		
+		dtNode* prev = 0;
+		dtNode* node = m_query.lastBestNode;
+		do
+		{
+			dtNode* next = m_nodePool->getNodeAtIdx(node->pidx);
+			node->pidx = m_nodePool->getNodeIdx(prev);
+			prev = node;
+			node = next;
+		}
+		while (node);
+		
+		// Store path
+		node = prev;
+		do
+		{
+			path[n++] = node->id;
+			if (n >= maxPath)
+			{
+				m_query.status |= DT_BUFFER_TOO_SMALL;
+				break;
+			}
+			node = m_nodePool->getNodeAtIdx(node->pidx);
+		}
+		while (node);
+	}
+	
+	const dtStatus details = m_query.status & DT_STATUS_DETAIL_MASK;
+
+	// Reset query.
+	memset(&m_query, 0, sizeof(dtQueryData));
+	
+	*pathCount = n;
+	
+	return DT_SUCCESS | details;
+}
+
+dtStatus dtNavMeshQuery::finalizeSlicedFindPathPartial(const dtPolyRef* existing, const int existingSize,
+													   dtPolyRef* path, int* pathCount, const int maxPath)
+{
+	*pathCount = 0;
+	
+	if (existingSize == 0)
+	{
+		return DT_FAILURE;
+	}
+	
+	if (dtStatusFailed(m_query.status))
+	{
+		// Reset query.
+		memset(&m_query, 0, sizeof(dtQueryData));
+		return DT_FAILURE;
+	}
+	
+	int n = 0;
+	
+	if (m_query.startRef == m_query.endRef)
+	{
+		// Special case: the search starts and ends at same poly.
+		path[n++] = m_query.startRef;
+	}
+	else
+	{
+		// Find furthest existing node that was visited.
+		dtNode* prev = 0;
+		dtNode* node = 0;
+		for (int i = existingSize-1; i >= 0; --i)
+		{
+			node = m_nodePool->findNode(existing[i]);
+			if (node)
+				break;
+		}
+		
+		if (!node)
+		{
+			m_query.status |= DT_PARTIAL_RESULT;
+			dtAssert(m_query.lastBestNode);
+			node = m_query.lastBestNode;
+		}
+		
+		// Reverse the path.
+		do
+		{
+			dtNode* next = m_nodePool->getNodeAtIdx(node->pidx);
+			node->pidx = m_nodePool->getNodeIdx(prev);
+			prev = node;
+			node = next;
+		}
+		while (node);
+		
+		// Store path
+		node = prev;
+		do
+		{
+			path[n++] = node->id;
+			if (n >= maxPath)
+			{
+				m_query.status |= DT_BUFFER_TOO_SMALL;
+				break;
+			}
+			node = m_nodePool->getNodeAtIdx(node->pidx);
+		}
+		while (node);
+	}
+	
+	const dtStatus details = m_query.status & DT_STATUS_DETAIL_MASK;
+
+	// Reset query.
+	memset(&m_query, 0, sizeof(dtQueryData));
+	
+	*pathCount = n;
+	
+	return DT_SUCCESS | details;
+}
+
+/// @par
+/// 
+/// This method peforms what is often called 'string pulling'.
+///
+/// The start position is clamped to the first polygon in the path, and the 
+/// end position is clamped to the last. So the start and end positions should 
+/// normally be within or very near the first and last polygons respectively.
+///
+/// The returned polygon references represent the reference id of the polygon 
+/// that is entered at the associated path position. The reference id associated 
+/// with the end point will always be zero.  This allows, for example, matching 
+/// off-mesh link points to their representative polygons.
+///
+/// If the provided result buffers are too small for the entire result set, 
+/// they will be filled as far as possible from the start toward the end 
+/// position.
+///
+dtStatus dtNavMeshQuery::findStraightPath(const float* startPos, const float* endPos,
+										  const dtPolyRef* path, const int pathSize,
+										  float* straightPath, unsigned char* straightPathFlags, dtPolyRef* straightPathRefs,
+										  int* straightPathCount, const int maxStraightPath) const
+{
+	dtAssert(m_nav);
+	
+	*straightPathCount = 0;
+	
+	if (!maxStraightPath)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	if (!path[0])
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	int n = 0;
+	
+	// TODO: Should this be callers responsibility?
+	float closestStartPos[3];
+	if (dtStatusFailed(closestPointOnPolyBoundary(path[0], startPos, closestStartPos)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Add start point.
+	dtVcopy(&straightPath[n*3], closestStartPos);
+	if (straightPathFlags)
+		straightPathFlags[n] = DT_STRAIGHTPATH_START;
+	if (straightPathRefs)
+		straightPathRefs[n] = path[0];
+	n++;
+	if (n >= maxStraightPath)
+	{
+		*straightPathCount = n;
+		return DT_SUCCESS | DT_BUFFER_TOO_SMALL;
+	}
+	
+	float closestEndPos[3];
+	if (dtStatusFailed(closestPointOnPolyBoundary(path[pathSize-1], endPos, closestEndPos)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	if (pathSize > 1)
+	{
+		float portalApex[3], portalLeft[3], portalRight[3];
+		dtVcopy(portalApex, closestStartPos);
+		dtVcopy(portalLeft, portalApex);
+		dtVcopy(portalRight, portalApex);
+		int apexIndex = 0;
+		int leftIndex = 0;
+		int rightIndex = 0;
+		
+		unsigned char leftPolyType = 0;
+		unsigned char rightPolyType = 0;
+		
+		dtPolyRef leftPolyRef = path[0];
+		dtPolyRef rightPolyRef = path[0];
+		
+		for (int i = 0; i < pathSize; ++i)
+		{
+			float left[3], right[3];
+			unsigned char fromType, toType;
+			
+			if (i+1 < pathSize)
+			{
+				// Next portal.
+				if (dtStatusFailed(getPortalPoints(path[i], path[i+1], left, right, fromType, toType)))
+				{
+					// Failed to get portal points, in practice this means that path[i+1] is invalid polygon.
+					// Clamp the end point to path[i], and return the path so far.
+					
+					if (dtStatusFailed(closestPointOnPolyBoundary(path[i], endPos, closestEndPos)))
+					{
+						// This should only happen when the first polygon is invalid.
+						return DT_FAILURE | DT_INVALID_PARAM;
+					}
+					
+					dtVcopy(&straightPath[n*3], closestEndPos);
+					if (straightPathFlags)
+						straightPathFlags[n] = 0;
+					if (straightPathRefs)
+						straightPathRefs[n] = path[i];
+					n++;
+					
+					*straightPathCount = n;
+					
+					return DT_SUCCESS | DT_PARTIAL_RESULT | ((n >= maxStraightPath) ? DT_BUFFER_TOO_SMALL : 0);
+				}
+				
+				// If starting really close the portal, advance.
+				if (i == 0)
+				{
+					float t;
+					if (dtDistancePtSegSqr2D(portalApex, left, right, t) < dtSqr(0.001f))
+						continue;
+				}
+			}
+			else
+			{
+				// End of the path.
+				dtVcopy(left, closestEndPos);
+				dtVcopy(right, closestEndPos);
+				
+				fromType = toType = DT_POLYTYPE_GROUND;
+			}
+			
+			// Right vertex.
+			if (dtTriArea2D(portalApex, portalRight, right) <= 0.0f)
+			{
+				if (dtVequal(portalApex, portalRight) || dtTriArea2D(portalApex, portalLeft, right) > 0.0f)
+				{
+					dtVcopy(portalRight, right);
+					rightPolyRef = (i+1 < pathSize) ? path[i+1] : 0;
+					rightPolyType = toType;
+					rightIndex = i;
+				}
+				else
+				{
+					dtVcopy(portalApex, portalLeft);
+					apexIndex = leftIndex;
+					
+					unsigned char flags = 0;
+					if (!leftPolyRef)
+						flags = DT_STRAIGHTPATH_END;
+					else if (leftPolyType == DT_POLYTYPE_OFFMESH_CONNECTION)
+						flags = DT_STRAIGHTPATH_OFFMESH_CONNECTION;
+					dtPolyRef ref = leftPolyRef;
+					
+					if (!dtVequal(&straightPath[(n-1)*3], portalApex))
+					{
+						// Append new vertex.
+						dtVcopy(&straightPath[n*3], portalApex);
+						if (straightPathFlags)
+							straightPathFlags[n] = flags;
+						if (straightPathRefs)
+							straightPathRefs[n] = ref;
+						n++;
+						// If reached end of path or there is no space to append more vertices, return.
+						if (flags == DT_STRAIGHTPATH_END || n >= maxStraightPath)
+						{
+							*straightPathCount = n;
+							return DT_SUCCESS | ((n >= maxStraightPath) ? DT_BUFFER_TOO_SMALL : 0);
+						}
+					}
+					else
+					{
+						// The vertices are equal, update flags and poly.
+						if (straightPathFlags)
+							straightPathFlags[n-1] = flags;
+						if (straightPathRefs)
+							straightPathRefs[n-1] = ref;
+					}
+					
+					dtVcopy(portalLeft, portalApex);
+					dtVcopy(portalRight, portalApex);
+					leftIndex = apexIndex;
+					rightIndex = apexIndex;
+					
+					// Restart
+					i = apexIndex;
+					
+					continue;
+				}
+			}
+			
+			// Left vertex.
+			if (dtTriArea2D(portalApex, portalLeft, left) >= 0.0f)
+			{
+				if (dtVequal(portalApex, portalLeft) || dtTriArea2D(portalApex, portalRight, left) < 0.0f)
+				{
+					dtVcopy(portalLeft, left);
+					leftPolyRef = (i+1 < pathSize) ? path[i+1] : 0;
+					leftPolyType = toType;
+					leftIndex = i;
+				}
+				else
+				{
+					dtVcopy(portalApex, portalRight);
+					apexIndex = rightIndex;
+					
+					unsigned char flags = 0;
+					if (!rightPolyRef)
+						flags = DT_STRAIGHTPATH_END;
+					else if (rightPolyType == DT_POLYTYPE_OFFMESH_CONNECTION)
+						flags = DT_STRAIGHTPATH_OFFMESH_CONNECTION;
+					dtPolyRef ref = rightPolyRef;
+					
+					if (!dtVequal(&straightPath[(n-1)*3], portalApex))
+					{
+						// Append new vertex.
+						dtVcopy(&straightPath[n*3], portalApex);
+						if (straightPathFlags)
+							straightPathFlags[n] = flags;
+						if (straightPathRefs)
+							straightPathRefs[n] = ref;
+						n++;
+						// If reached end of path or there is no space to append more vertices, return.
+						if (flags == DT_STRAIGHTPATH_END || n >= maxStraightPath)
+						{
+							*straightPathCount = n;
+							return DT_SUCCESS | ((n >= maxStraightPath) ? DT_BUFFER_TOO_SMALL : 0);
+						}
+					}
+					else
+					{
+						// The vertices are equal, update flags and poly.
+						if (straightPathFlags)
+							straightPathFlags[n-1] = flags;
+						if (straightPathRefs)
+							straightPathRefs[n-1] = ref;
+					}
+					
+					dtVcopy(portalLeft, portalApex);
+					dtVcopy(portalRight, portalApex);
+					leftIndex = apexIndex;
+					rightIndex = apexIndex;
+					
+					// Restart
+					i = apexIndex;
+					
+					continue;
+				}
+			}
+		}
+	}
+	
+	// If the point already exists, remove it and add reappend the actual end location.  
+	if (n > 0 && dtVequal(&straightPath[(n-1)*3], closestEndPos))
+		n--;
+	
+	// Add end point.
+	if (n < maxStraightPath)
+	{
+		dtVcopy(&straightPath[n*3], closestEndPos);
+		if (straightPathFlags)
+			straightPathFlags[n] = DT_STRAIGHTPATH_END;
+		if (straightPathRefs)
+			straightPathRefs[n] = 0;
+		n++;
+	}
+	
+	*straightPathCount = n;
+	
+	return DT_SUCCESS | ((n >= maxStraightPath) ? DT_BUFFER_TOO_SMALL : 0);
+}
+
+/// @par
+///
+/// This method is optimized for small delta movement and a small number of 
+/// polygons. If used for too great a distance, the result set will form an 
+/// incomplete path.
+///
+/// @p resultPos will equal the @p endPos if the end is reached. 
+/// Otherwise the closest reachable position will be returned.
+/// 
+/// @p resultPos is not projected onto the surface of the navigation 
+/// mesh. Use #getPolyHeight if this is needed.
+///
+/// This method treats the end position in the same manner as 
+/// the #raycast method. (As a 2D point.) See that method's documentation 
+/// for details.
+/// 
+/// If the @p visited array is too small to hold the entire result set, it will 
+/// be filled as far as possible from the start position toward the end 
+/// position.
+///
+dtStatus dtNavMeshQuery::moveAlongSurface(dtPolyRef startRef, const float* startPos, const float* endPos,
+										  const dtQueryFilter* filter,
+										  float* resultPos, dtPolyRef* visited, int* visitedCount, const int maxVisitedSize) const
+{
+	dtAssert(m_nav);
+	dtAssert(m_tinyNodePool);
+
+	*visitedCount = 0;
+	
+	// Validate input
+	if (!startRef)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	if (!m_nav->isValidPolyRef(startRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	dtStatus status = DT_SUCCESS;
+	
+	static const int MAX_STACK = 48;
+	dtNode* stack[MAX_STACK];
+	int nstack = 0;
+	
+	m_tinyNodePool->clear();
+	
+	dtNode* startNode = m_tinyNodePool->getNode(startRef);
+	startNode->pidx = 0;
+	startNode->cost = 0;
+	startNode->total = 0;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_CLOSED;
+	stack[nstack++] = startNode;
+	
+	float bestPos[3];
+	float bestDist = FLT_MAX;
+	dtNode* bestNode = 0;
+	dtVcopy(bestPos, startPos);
+	
+	// Search constraints
+	float searchPos[3], searchRadSqr;
+	dtVlerp(searchPos, startPos, endPos, 0.5f);
+	searchRadSqr = dtSqr(dtVdist(startPos, endPos)/2.0f + 0.001f);
+	
+	float verts[DT_VERTS_PER_POLYGON*3];
+	
+	while (nstack)
+	{
+		// Pop front.
+		dtNode* curNode = stack[0];
+		for (int i = 0; i < nstack-1; ++i)
+			stack[i] = stack[i+1];
+		nstack--;
+		
+		// Get poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef curRef = curNode->id;
+		const dtMeshTile* curTile = 0;
+		const dtPoly* curPoly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(curRef, &curTile, &curPoly);			
+		
+		// Collect vertices.
+		const int nverts = curPoly->vertCount;
+		for (int i = 0; i < nverts; ++i)
+			dtVcopy(&verts[i*3], &curTile->verts[curPoly->verts[i]*3]);
+		
+		// If target is inside the poly, stop search.
+		if (dtPointInPolygon(endPos, verts, nverts))
+		{
+			bestNode = curNode;
+			dtVcopy(bestPos, endPos);
+			break;
+		}
+		
+		// Find wall edges and find nearest point inside the walls.
+		for (int i = 0, j = (int)curPoly->vertCount-1; i < (int)curPoly->vertCount; j = i++)
+		{
+			// Find links to neighbours.
+			static const int MAX_NEIS = 8;
+			int nneis = 0;
+			dtPolyRef neis[MAX_NEIS];
+			
+			if (curPoly->neis[j] & DT_EXT_LINK)
+			{
+				// Tile border.
+				for (unsigned int k = curPoly->firstLink; k != DT_NULL_LINK; k = curTile->links[k].next)
+				{
+					const dtLink* link = &curTile->links[k];
+					if (link->edge == j)
+					{
+						if (link->ref != 0)
+						{
+							const dtMeshTile* neiTile = 0;
+							const dtPoly* neiPoly = 0;
+							m_nav->getTileAndPolyByRefUnsafe(link->ref, &neiTile, &neiPoly);
+							if (filter->passFilter(link->ref, neiTile, neiPoly))
+							{
+								if (nneis < MAX_NEIS)
+									neis[nneis++] = link->ref;
+							}
+						}
+					}
+				}
+			}
+			else if (curPoly->neis[j])
+			{
+				const unsigned int idx = (unsigned int)(curPoly->neis[j]-1);
+				const dtPolyRef ref = m_nav->getPolyRefBase(curTile) | idx;
+				if (filter->passFilter(ref, curTile, &curTile->polys[idx]))
+				{
+					// Internal edge, encode id.
+					neis[nneis++] = ref;
+				}
+			}
+			
+			if (!nneis)
+			{
+				// Wall edge, calc distance.
+				const float* vj = &verts[j*3];
+				const float* vi = &verts[i*3];
+				float tseg;
+				const float distSqr = dtDistancePtSegSqr2D(endPos, vj, vi, tseg);
+				if (distSqr < bestDist)
+				{
+                    // Update nearest distance.
+					dtVlerp(bestPos, vj,vi, tseg);
+					bestDist = distSqr;
+					bestNode = curNode;
+				}
+			}
+			else
+			{
+				for (int k = 0; k < nneis; ++k)
+				{
+					// Skip if no node can be allocated.
+					dtNode* neighbourNode = m_tinyNodePool->getNode(neis[k]);
+					if (!neighbourNode)
+						continue;
+					// Skip if already visited.
+					if (neighbourNode->flags & DT_NODE_CLOSED)
+						continue;
+					
+					// Skip the link if it is too far from search constraint.
+					// TODO: Maybe should use getPortalPoints(), but this one is way faster.
+					const float* vj = &verts[j*3];
+					const float* vi = &verts[i*3];
+					float tseg;
+					float distSqr = dtDistancePtSegSqr2D(searchPos, vj, vi, tseg);
+					if (distSqr > searchRadSqr)
+						continue;
+					
+					// Mark as the node as visited and push to queue.
+					if (nstack < MAX_STACK)
+					{
+						neighbourNode->pidx = m_tinyNodePool->getNodeIdx(curNode);
+						neighbourNode->flags |= DT_NODE_CLOSED;
+						stack[nstack++] = neighbourNode;
+					}
+				}
+			}
+		}
+	}
+	
+	int n = 0;
+	if (bestNode)
+	{
+		// Reverse the path.
+		dtNode* prev = 0;
+		dtNode* node = bestNode;
+		do
+		{
+			dtNode* next = m_tinyNodePool->getNodeAtIdx(node->pidx);
+			node->pidx = m_tinyNodePool->getNodeIdx(prev);
+			prev = node;
+			node = next;
+		}
+		while (node);
+		
+		// Store result
+		node = prev;
+		do
+		{
+			visited[n++] = node->id;
+			if (n >= maxVisitedSize)
+			{
+				status |= DT_BUFFER_TOO_SMALL;
+				break;
+			}
+			node = m_tinyNodePool->getNodeAtIdx(node->pidx);
+		}
+		while (node);
+	}
+	
+	dtVcopy(resultPos, bestPos);
+	
+	*visitedCount = n;
+	
+	return status;
+}
+
+
+dtStatus dtNavMeshQuery::getPortalPoints(dtPolyRef from, dtPolyRef to, float* left, float* right,
+										 unsigned char& fromType, unsigned char& toType) const
+{
+	dtAssert(m_nav);
+	
+	const dtMeshTile* fromTile = 0;
+	const dtPoly* fromPoly = 0;
+	if (dtStatusFailed(m_nav->getTileAndPolyByRef(from, &fromTile, &fromPoly)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	fromType = fromPoly->getType();
+
+	const dtMeshTile* toTile = 0;
+	const dtPoly* toPoly = 0;
+	if (dtStatusFailed(m_nav->getTileAndPolyByRef(to, &toTile, &toPoly)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	toType = toPoly->getType();
+		
+	return getPortalPoints(from, fromPoly, fromTile, to, toPoly, toTile, left, right);
+}
+
+// Returns portal points between two polygons.
+dtStatus dtNavMeshQuery::getPortalPoints(dtPolyRef from, const dtPoly* fromPoly, const dtMeshTile* fromTile,
+										 dtPolyRef to, const dtPoly* toPoly, const dtMeshTile* toTile,
+										 float* left, float* right) const
+{
+	// Find the link that points to the 'to' polygon.
+	const dtLink* link = 0;
+	for (unsigned int i = fromPoly->firstLink; i != DT_NULL_LINK; i = fromTile->links[i].next)
+	{
+		if (fromTile->links[i].ref == to)
+		{
+			link = &fromTile->links[i];
+			break;
+		}
+	}
+	if (!link)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Handle off-mesh connections.
+	if (fromPoly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+	{
+		// Find link that points to first vertex.
+		for (unsigned int i = fromPoly->firstLink; i != DT_NULL_LINK; i = fromTile->links[i].next)
+		{
+			if (fromTile->links[i].ref == to)
+			{
+				const int v = fromTile->links[i].edge;
+				dtVcopy(left, &fromTile->verts[fromPoly->verts[v]*3]);
+				dtVcopy(right, &fromTile->verts[fromPoly->verts[v]*3]);
+				return DT_SUCCESS;
+			}
+		}
+		return DT_FAILURE | DT_INVALID_PARAM;
+	}
+	
+	if (toPoly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+	{
+		for (unsigned int i = toPoly->firstLink; i != DT_NULL_LINK; i = toTile->links[i].next)
+		{
+			if (toTile->links[i].ref == from)
+			{
+				const int v = toTile->links[i].edge;
+				dtVcopy(left, &toTile->verts[toPoly->verts[v]*3]);
+				dtVcopy(right, &toTile->verts[toPoly->verts[v]*3]);
+				return DT_SUCCESS;
+			}
+		}
+		return DT_FAILURE | DT_INVALID_PARAM;
+	}
+	
+	// Find portal vertices.
+	const int v0 = fromPoly->verts[link->edge];
+	const int v1 = fromPoly->verts[(link->edge+1) % (int)fromPoly->vertCount];
+	dtVcopy(left, &fromTile->verts[v0*3]);
+	dtVcopy(right, &fromTile->verts[v1*3]);
+	
+	// If the link is at tile boundary, dtClamp the vertices to
+	// the link width.
+	if (link->side != 0xff)
+	{
+		// Unpack portal limits.
+		if (link->bmin != 0 || link->bmax != 255)
+		{
+			const float s = 1.0f/255.0f;
+			const float tmin = link->bmin*s;
+			const float tmax = link->bmax*s;
+			dtVlerp(left, &fromTile->verts[v0*3], &fromTile->verts[v1*3], tmin);
+			dtVlerp(right, &fromTile->verts[v0*3], &fromTile->verts[v1*3], tmax);
+		}
+	}
+	
+	return DT_SUCCESS;
+}
+
+// Returns edge mid point between two polygons.
+dtStatus dtNavMeshQuery::getEdgeMidPoint(dtPolyRef from, dtPolyRef to, float* mid) const
+{
+	float left[3], right[3];
+	unsigned char fromType, toType;
+	if (dtStatusFailed(getPortalPoints(from, to, left,right, fromType, toType)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	mid[0] = (left[0]+right[0])*0.5f;
+	mid[1] = (left[1]+right[1])*0.5f;
+	mid[2] = (left[2]+right[2])*0.5f;
+	return DT_SUCCESS;
+}
+
+dtStatus dtNavMeshQuery::getEdgeMidPoint(dtPolyRef from, const dtPoly* fromPoly, const dtMeshTile* fromTile,
+										 dtPolyRef to, const dtPoly* toPoly, const dtMeshTile* toTile,
+										 float* mid) const
+{
+	float left[3], right[3];
+	if (dtStatusFailed(getPortalPoints(from, fromPoly, fromTile, to, toPoly, toTile, left, right)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	mid[0] = (left[0]+right[0])*0.5f;
+	mid[1] = (left[1]+right[1])*0.5f;
+	mid[2] = (left[2]+right[2])*0.5f;
+	return DT_SUCCESS;
+}
+
+/// @par
+///
+/// This method is meant to be used for quick, short distance checks.
+///
+/// If the path array is too small to hold the result, it will be filled as 
+/// far as possible from the start postion toward the end position.
+///
+/// <b>Using the Hit Parameter (t)</b>
+/// 
+/// If the hit parameter is a very high value (FLT_MAX), then the ray has hit 
+/// the end position. In this case the path represents a valid corridor to the 
+/// end position and the value of @p hitNormal is undefined.
+///
+/// If the hit parameter is zero, then the start position is on the wall that 
+/// was hit and the value of @p hitNormal is undefined.
+///
+/// If 0 < t < 1.0 then the following applies:
+///
+/// @code
+/// distanceToHitBorder = distanceToEndPosition * t
+/// hitPoint = startPos + (endPos - startPos) * t
+/// @endcode
+///
+/// <b>Use Case Restriction</b>
+///
+/// The raycast ignores the y-value of the end position. (2D check.) This 
+/// places significant limits on how it can be used. For example:
+///
+/// Consider a scene where there is a main floor with a second floor balcony 
+/// that hangs over the main floor. So the first floor mesh extends below the 
+/// balcony mesh. The start position is somewhere on the first floor. The end 
+/// position is on the balcony.
+///
+/// The raycast will search toward the end position along the first floor mesh. 
+/// If it reaches the end position's xz-coordinates it will indicate FLT_MAX
+/// (no wall hit), meaning it reached the end position. This is one example of why
+/// this method is meant for short distance checks.
+///
+dtStatus dtNavMeshQuery::raycast(dtPolyRef startRef, const float* startPos, const float* endPos,
+								 const dtQueryFilter* filter,
+								 float* t, float* hitNormal, dtPolyRef* path, int* pathCount, const int maxPath) const
+{
+	dtAssert(m_nav);
+	
+	*t = 0;
+	if (pathCount)
+		*pathCount = 0;
+	
+	// Validate input
+	if (!startRef || !m_nav->isValidPolyRef(startRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	dtPolyRef curRef = startRef;
+	float verts[DT_VERTS_PER_POLYGON*3];	
+	int n = 0;
+	
+	hitNormal[0] = 0;
+	hitNormal[1] = 0;
+	hitNormal[2] = 0;
+	
+	dtStatus status = DT_SUCCESS;
+	
+	while (curRef)
+	{
+		// Cast ray against current polygon.
+		
+		// The API input has been cheked already, skip checking internal data.
+		const dtMeshTile* tile = 0;
+		const dtPoly* poly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(curRef, &tile, &poly);
+		
+		// Collect vertices.
+		int nv = 0;
+		for (int i = 0; i < (int)poly->vertCount; ++i)
+		{
+			dtVcopy(&verts[nv*3], &tile->verts[poly->verts[i]*3]);
+			nv++;
+		}		
+		
+		float tmin, tmax;
+		int segMin, segMax;
+		if (!dtIntersectSegmentPoly2D(startPos, endPos, verts, nv, tmin, tmax, segMin, segMax))
+		{
+			// Could not hit the polygon, keep the old t and report hit.
+			if (pathCount)
+				*pathCount = n;
+			return status;
+		}
+		// Keep track of furthest t so far.
+		if (tmax > *t)
+			*t = tmax;
+		
+		// Store visited polygons.
+		if (n < maxPath)
+			path[n++] = curRef;
+		else
+			status |= DT_BUFFER_TOO_SMALL;
+		
+		// Ray end is completely inside the polygon.
+		if (segMax == -1)
+		{
+			*t = FLT_MAX;
+			if (pathCount)
+				*pathCount = n;
+			return status;
+		}
+		
+		// Follow neighbours.
+		dtPolyRef nextRef = 0;
+		
+		for (unsigned int i = poly->firstLink; i != DT_NULL_LINK; i = tile->links[i].next)
+		{
+			const dtLink* link = &tile->links[i];
+			
+			// Find link which contains this edge.
+			if ((int)link->edge != segMax)
+				continue;
+			
+			// Get pointer to the next polygon.
+			const dtMeshTile* nextTile = 0;
+			const dtPoly* nextPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(link->ref, &nextTile, &nextPoly);
+			
+			// Skip off-mesh connections.
+			if (nextPoly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+				continue;
+			
+			// Skip links based on filter.
+			if (!filter->passFilter(link->ref, nextTile, nextPoly))
+				continue;
+			
+			// If the link is internal, just return the ref.
+			if (link->side == 0xff)
+			{
+				nextRef = link->ref;
+				break;
+			}
+			
+			// If the link is at tile boundary,
+			
+			// Check if the link spans the whole edge, and accept.
+			if (link->bmin == 0 && link->bmax == 255)
+			{
+				nextRef = link->ref;
+				break;
+			}
+			
+			// Check for partial edge links.
+			const int v0 = poly->verts[link->edge];
+			const int v1 = poly->verts[(link->edge+1) % poly->vertCount];
+			const float* left = &tile->verts[v0*3];
+			const float* right = &tile->verts[v1*3];
+			
+			// Check that the intersection lies inside the link portal.
+			if (link->side == 0 || link->side == 4)
+			{
+				// Calculate link size.
+				const float s = 1.0f/255.0f;
+				float lmin = left[2] + (right[2] - left[2])*(link->bmin*s);
+				float lmax = left[2] + (right[2] - left[2])*(link->bmax*s);
+				if (lmin > lmax) dtSwap(lmin, lmax);
+				
+				// Find Z intersection.
+				float z = startPos[2] + (endPos[2]-startPos[2])*tmax;
+				if (z >= lmin && z <= lmax)
+				{
+					nextRef = link->ref;
+					break;
+				}
+			}
+			else if (link->side == 2 || link->side == 6)
+			{
+				// Calculate link size.
+				const float s = 1.0f/255.0f;
+				float lmin = left[0] + (right[0] - left[0])*(link->bmin*s);
+				float lmax = left[0] + (right[0] - left[0])*(link->bmax*s);
+				if (lmin > lmax) dtSwap(lmin, lmax);
+				
+				// Find X intersection.
+				float x = startPos[0] + (endPos[0]-startPos[0])*tmax;
+				if (x >= lmin && x <= lmax)
+				{
+					nextRef = link->ref;
+					break;
+				}
+			}
+		}
+		
+		if (!nextRef)
+		{
+			// No neighbour, we hit a wall.
+			
+			// Calculate hit normal.
+			const int a = segMax;
+			const int b = segMax+1 < nv ? segMax+1 : 0;
+			const float* va = &verts[a*3];
+			const float* vb = &verts[b*3];
+			const float dx = vb[0] - va[0];
+			const float dz = vb[2] - va[2];
+			hitNormal[0] = dz;
+			hitNormal[1] = 0;
+			hitNormal[2] = -dx;
+			dtVnormalize(hitNormal);
+			
+			if (pathCount)
+				*pathCount = n;
+			return status;
+		}
+		
+		// No hit, advance to neighbour polygon.
+		curRef = nextRef;
+	}
+	
+	if (pathCount)
+		*pathCount = n;
+	
+	return status;
+}
+
+/// @par
+///
+/// At least one result array must be provided.
+///
+/// The order of the result set is from least to highest cost to reach the polygon.
+///
+/// A common use case for this method is to perform Dijkstra searches. 
+/// Candidate polygons are found by searching the graph beginning at the start polygon.
+///
+/// If a polygon is not found via the graph search, even if it intersects the 
+/// search circle, it will not be included in the result set. For example:
+///
+/// polyA is the start polygon.
+/// polyB shares an edge with polyA. (Is adjacent.)
+/// polyC shares an edge with polyB, but not with polyA
+/// Even if the search circle overlaps polyC, it will not be included in the 
+/// result set unless polyB is also in the set.
+/// 
+/// The value of the center point is used as the start position for cost 
+/// calculations. It is not projected onto the surface of the mesh, so its 
+/// y-value will effect the costs.
+///
+/// Intersection tests occur in 2D. All polygons and the search circle are 
+/// projected onto the xz-plane. So the y-value of the center point does not 
+/// effect intersection tests.
+///
+/// If the result arrays are to small to hold the entire result set, they will be 
+/// filled to capacity.
+/// 
+dtStatus dtNavMeshQuery::findPolysAroundCircle(dtPolyRef startRef, const float* centerPos, const float radius,
+											   const dtQueryFilter* filter,
+											   dtPolyRef* resultRef, dtPolyRef* resultParent, float* resultCost,
+											   int* resultCount, const int maxResult) const
+{
+	dtAssert(m_nav);
+	dtAssert(m_nodePool);
+	dtAssert(m_openList);
+
+	*resultCount = 0;
+	
+	// Validate input
+	if (!startRef || !m_nav->isValidPolyRef(startRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	m_nodePool->clear();
+	m_openList->clear();
+	
+	dtNode* startNode = m_nodePool->getNode(startRef);
+	dtVcopy(startNode->pos, centerPos);
+	startNode->pidx = 0;
+	startNode->cost = 0;
+	startNode->total = 0;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_OPEN;
+	m_openList->push(startNode);
+	
+	dtStatus status = DT_SUCCESS;
+	
+	int n = 0;
+	if (n < maxResult)
+	{
+		if (resultRef)
+			resultRef[n] = startNode->id;
+		if (resultParent)
+			resultParent[n] = 0;
+		if (resultCost)
+			resultCost[n] = 0;
+		++n;
+	}
+	else
+	{
+		status |= DT_BUFFER_TOO_SMALL;
+	}
+	
+	const float radiusSqr = dtSqr(radius);
+	
+	while (!m_openList->empty())
+	{
+		dtNode* bestNode = m_openList->pop();
+		bestNode->flags &= ~DT_NODE_OPEN;
+		bestNode->flags |= DT_NODE_CLOSED;
+		
+		// Get poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef bestRef = bestNode->id;
+		const dtMeshTile* bestTile = 0;
+		const dtPoly* bestPoly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(bestRef, &bestTile, &bestPoly);
+		
+		// Get parent poly and tile.
+		dtPolyRef parentRef = 0;
+		const dtMeshTile* parentTile = 0;
+		const dtPoly* parentPoly = 0;
+		if (bestNode->pidx)
+			parentRef = m_nodePool->getNodeAtIdx(bestNode->pidx)->id;
+		if (parentRef)
+			m_nav->getTileAndPolyByRefUnsafe(parentRef, &parentTile, &parentPoly);
+		
+		for (unsigned int i = bestPoly->firstLink; i != DT_NULL_LINK; i = bestTile->links[i].next)
+		{
+			const dtLink* link = &bestTile->links[i];
+			dtPolyRef neighbourRef = link->ref;
+			// Skip invalid neighbours and do not follow back to parent.
+			if (!neighbourRef || neighbourRef == parentRef)
+				continue;
+			
+			// Expand to neighbour
+			const dtMeshTile* neighbourTile = 0;
+			const dtPoly* neighbourPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(neighbourRef, &neighbourTile, &neighbourPoly);
+		
+			// Do not advance if the polygon is excluded by the filter.
+			if (!filter->passFilter(neighbourRef, neighbourTile, neighbourPoly))
+				continue;
+			
+			// Find edge and calc distance to the edge.
+			float va[3], vb[3];
+			if (!getPortalPoints(bestRef, bestPoly, bestTile, neighbourRef, neighbourPoly, neighbourTile, va, vb))
+				continue;
+			
+			// If the circle is not touching the next polygon, skip it.
+			float tseg;
+			float distSqr = dtDistancePtSegSqr2D(centerPos, va, vb, tseg);
+			if (distSqr > radiusSqr)
+				continue;
+			
+			dtNode* neighbourNode = m_nodePool->getNode(neighbourRef);
+			if (!neighbourNode)
+			{
+				status |= DT_OUT_OF_NODES;
+				continue;
+			}
+				
+			if (neighbourNode->flags & DT_NODE_CLOSED)
+				continue;
+			
+			// Cost
+			if (neighbourNode->flags == 0)
+				dtVlerp(neighbourNode->pos, va, vb, 0.5f);
+			
+			const float total = bestNode->total + dtVdist(bestNode->pos, neighbourNode->pos);
+			
+			// The node is already in open list and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_OPEN) && total >= neighbourNode->total)
+				continue;
+			
+			neighbourNode->id = neighbourRef;
+			neighbourNode->flags = (neighbourNode->flags & ~DT_NODE_CLOSED);
+			neighbourNode->pidx = m_nodePool->getNodeIdx(bestNode);
+			neighbourNode->total = total;
+			
+			if (neighbourNode->flags & DT_NODE_OPEN)
+			{
+				m_openList->modify(neighbourNode);
+			}
+			else
+			{
+				if (n < maxResult)
+				{
+					if (resultRef)
+						resultRef[n] = neighbourNode->id;
+					if (resultParent)
+						resultParent[n] = m_nodePool->getNodeAtIdx(neighbourNode->pidx)->id;
+					if (resultCost)
+						resultCost[n] = neighbourNode->total;
+					++n;
+				}
+				else
+				{
+					status |= DT_BUFFER_TOO_SMALL;
+				}
+				neighbourNode->flags = DT_NODE_OPEN;
+				m_openList->push(neighbourNode);
+			}
+		}
+	}
+	
+	*resultCount = n;
+	
+	return status;
+}
+
+/// @par
+///
+/// The order of the result set is from least to highest cost.
+/// 
+/// At least one result array must be provided.
+///
+/// A common use case for this method is to perform Dijkstra searches. 
+/// Candidate polygons are found by searching the graph beginning at the start 
+/// polygon.
+/// 
+/// The same intersection test restrictions that apply to findPolysAroundCircle()
+/// method apply to this method.
+/// 
+/// The 3D centroid of the search polygon is used as the start position for cost 
+/// calculations.
+/// 
+/// Intersection tests occur in 2D. All polygons are projected onto the 
+/// xz-plane. So the y-values of the vertices do not effect intersection tests.
+/// 
+/// If the result arrays are is too small to hold the entire result set, they will 
+/// be filled to capacity.
+///
+dtStatus dtNavMeshQuery::findPolysAroundShape(dtPolyRef startRef, const float* verts, const int nverts,
+											  const dtQueryFilter* filter,
+											  dtPolyRef* resultRef, dtPolyRef* resultParent, float* resultCost,
+											  int* resultCount, const int maxResult) const
+{
+	dtAssert(m_nav);
+	dtAssert(m_nodePool);
+	dtAssert(m_openList);
+	
+	*resultCount = 0;
+	
+	// Validate input
+	if (!startRef || !m_nav->isValidPolyRef(startRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	m_nodePool->clear();
+	m_openList->clear();
+	
+	float centerPos[3] = {0,0,0};
+	for (int i = 0; i < nverts; ++i)
+		dtVadd(centerPos,centerPos,&verts[i*3]);
+	dtVscale(centerPos,centerPos,1.0f/nverts);
+
+	dtNode* startNode = m_nodePool->getNode(startRef);
+	dtVcopy(startNode->pos, centerPos);
+	startNode->pidx = 0;
+	startNode->cost = 0;
+	startNode->total = 0;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_OPEN;
+	m_openList->push(startNode);
+	
+	dtStatus status = DT_SUCCESS;
+
+	int n = 0;
+	if (n < maxResult)
+	{
+		if (resultRef)
+			resultRef[n] = startNode->id;
+		if (resultParent)
+			resultParent[n] = 0;
+		if (resultCost)
+			resultCost[n] = 0;
+		++n;
+	}
+	else
+	{
+		status |= DT_BUFFER_TOO_SMALL;
+	}
+	
+	while (!m_openList->empty())
+	{
+		dtNode* bestNode = m_openList->pop();
+		bestNode->flags &= ~DT_NODE_OPEN;
+		bestNode->flags |= DT_NODE_CLOSED;
+		
+		// Get poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef bestRef = bestNode->id;
+		const dtMeshTile* bestTile = 0;
+		const dtPoly* bestPoly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(bestRef, &bestTile, &bestPoly);
+		
+		// Get parent poly and tile.
+		dtPolyRef parentRef = 0;
+		const dtMeshTile* parentTile = 0;
+		const dtPoly* parentPoly = 0;
+		if (bestNode->pidx)
+			parentRef = m_nodePool->getNodeAtIdx(bestNode->pidx)->id;
+		if (parentRef)
+			m_nav->getTileAndPolyByRefUnsafe(parentRef, &parentTile, &parentPoly);
+		
+		for (unsigned int i = bestPoly->firstLink; i != DT_NULL_LINK; i = bestTile->links[i].next)
+		{
+			const dtLink* link = &bestTile->links[i];
+			dtPolyRef neighbourRef = link->ref;
+			// Skip invalid neighbours and do not follow back to parent.
+			if (!neighbourRef || neighbourRef == parentRef)
+				continue;
+			
+			// Expand to neighbour
+			const dtMeshTile* neighbourTile = 0;
+			const dtPoly* neighbourPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(neighbourRef, &neighbourTile, &neighbourPoly);
+			
+			// Do not advance if the polygon is excluded by the filter.
+			if (!filter->passFilter(neighbourRef, neighbourTile, neighbourPoly))
+				continue;
+			
+			// Find edge and calc distance to the edge.
+			float va[3], vb[3];
+			if (!getPortalPoints(bestRef, bestPoly, bestTile, neighbourRef, neighbourPoly, neighbourTile, va, vb))
+				continue;
+			
+			// If the poly is not touching the edge to the next polygon, skip the connection it.
+			float tmin, tmax;
+			int segMin, segMax;
+			if (!dtIntersectSegmentPoly2D(va, vb, verts, nverts, tmin, tmax, segMin, segMax))
+				continue;
+			if (tmin > 1.0f || tmax < 0.0f)
+				continue;
+			
+			dtNode* neighbourNode = m_nodePool->getNode(neighbourRef);
+			if (!neighbourNode)
+			{
+				status |= DT_OUT_OF_NODES;
+				continue;
+			}
+			
+			if (neighbourNode->flags & DT_NODE_CLOSED)
+				continue;
+			
+			// Cost
+			if (neighbourNode->flags == 0)
+				dtVlerp(neighbourNode->pos, va, vb, 0.5f);
+			
+			const float total = bestNode->total + dtVdist(bestNode->pos, neighbourNode->pos);
+			
+			// The node is already in open list and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_OPEN) && total >= neighbourNode->total)
+				continue;
+			
+			neighbourNode->id = neighbourRef;
+			neighbourNode->flags = (neighbourNode->flags & ~DT_NODE_CLOSED);
+			neighbourNode->pidx = m_nodePool->getNodeIdx(bestNode);
+			neighbourNode->total = total;
+			
+			if (neighbourNode->flags & DT_NODE_OPEN)
+			{
+				m_openList->modify(neighbourNode);
+			}
+			else
+			{
+				if (n < maxResult)
+				{
+					if (resultRef)
+						resultRef[n] = neighbourNode->id;
+					if (resultParent)
+						resultParent[n] = m_nodePool->getNodeAtIdx(neighbourNode->pidx)->id;
+					if (resultCost)
+						resultCost[n] = neighbourNode->total;
+					++n;
+				}
+				else
+				{
+					status |= DT_BUFFER_TOO_SMALL;
+				}
+				neighbourNode->flags = DT_NODE_OPEN;
+				m_openList->push(neighbourNode);
+			}
+		}
+	}
+	
+	*resultCount = n;
+	
+	return status;
+}
+
+/// @par
+///
+/// This method is optimized for a small search radius and small number of result 
+/// polygons.
+///
+/// Candidate polygons are found by searching the navigation graph beginning at 
+/// the start polygon.
+///
+/// The same intersection test restrictions that apply to the findPolysAroundCircle 
+/// mehtod applies to this method.
+///
+/// The value of the center point is used as the start point for cost calculations. 
+/// It is not projected onto the surface of the mesh, so its y-value will effect 
+/// the costs.
+/// 
+/// Intersection tests occur in 2D. All polygons and the search circle are 
+/// projected onto the xz-plane. So the y-value of the center point does not 
+/// effect intersection tests.
+/// 
+/// If the result arrays are is too small to hold the entire result set, they will 
+/// be filled to capacity.
+/// 
+dtStatus dtNavMeshQuery::findLocalNeighbourhood(dtPolyRef startRef, const float* centerPos, const float radius,
+												const dtQueryFilter* filter,
+												dtPolyRef* resultRef, dtPolyRef* resultParent,
+												int* resultCount, const int maxResult) const
+{
+	dtAssert(m_nav);
+	dtAssert(m_tinyNodePool);
+	
+	*resultCount = 0;
+
+	// Validate input
+	if (!startRef || !m_nav->isValidPolyRef(startRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	static const int MAX_STACK = 48;
+	dtNode* stack[MAX_STACK];
+	int nstack = 0;
+	
+	m_tinyNodePool->clear();
+	
+	dtNode* startNode = m_tinyNodePool->getNode(startRef);
+	startNode->pidx = 0;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_CLOSED;
+	stack[nstack++] = startNode;
+	
+	const float radiusSqr = dtSqr(radius);
+	
+	float pa[DT_VERTS_PER_POLYGON*3];
+	float pb[DT_VERTS_PER_POLYGON*3];
+	
+	dtStatus status = DT_SUCCESS;
+	
+	int n = 0;
+	if (n < maxResult)
+	{
+		resultRef[n] = startNode->id;
+		if (resultParent)
+			resultParent[n] = 0;
+		++n;
+	}
+	else
+	{
+		status |= DT_BUFFER_TOO_SMALL;
+	}
+	
+	while (nstack)
+	{
+		// Pop front.
+		dtNode* curNode = stack[0];
+		for (int i = 0; i < nstack-1; ++i)
+			stack[i] = stack[i+1];
+		nstack--;
+		
+		// Get poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef curRef = curNode->id;
+		const dtMeshTile* curTile = 0;
+		const dtPoly* curPoly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(curRef, &curTile, &curPoly);
+		
+		for (unsigned int i = curPoly->firstLink; i != DT_NULL_LINK; i = curTile->links[i].next)
+		{
+			const dtLink* link = &curTile->links[i];
+			dtPolyRef neighbourRef = link->ref;
+			// Skip invalid neighbours.
+			if (!neighbourRef)
+				continue;
+			
+			// Skip if cannot alloca more nodes.
+			dtNode* neighbourNode = m_tinyNodePool->getNode(neighbourRef);
+			if (!neighbourNode)
+				continue;
+			// Skip visited.
+			if (neighbourNode->flags & DT_NODE_CLOSED)
+				continue;
+			
+			// Expand to neighbour
+			const dtMeshTile* neighbourTile = 0;
+			const dtPoly* neighbourPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(neighbourRef, &neighbourTile, &neighbourPoly);
+			
+			// Skip off-mesh connections.
+			if (neighbourPoly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+				continue;
+			
+			// Do not advance if the polygon is excluded by the filter.
+			if (!filter->passFilter(neighbourRef, neighbourTile, neighbourPoly))
+				continue;
+			
+			// Find edge and calc distance to the edge.
+			float va[3], vb[3];
+			if (!getPortalPoints(curRef, curPoly, curTile, neighbourRef, neighbourPoly, neighbourTile, va, vb))
+				continue;
+			
+			// If the circle is not touching the next polygon, skip it.
+			float tseg;
+			float distSqr = dtDistancePtSegSqr2D(centerPos, va, vb, tseg);
+			if (distSqr > radiusSqr)
+				continue;
+			
+			// Mark node visited, this is done before the overlap test so that
+			// we will not visit the poly again if the test fails.
+			neighbourNode->flags |= DT_NODE_CLOSED;
+			neighbourNode->pidx = m_tinyNodePool->getNodeIdx(curNode);
+			
+			// Check that the polygon does not collide with existing polygons.
+			
+			// Collect vertices of the neighbour poly.
+			const int npa = neighbourPoly->vertCount;
+			for (int k = 0; k < npa; ++k)
+				dtVcopy(&pa[k*3], &neighbourTile->verts[neighbourPoly->verts[k]*3]);
+			
+			bool overlap = false;
+			for (int j = 0; j < n; ++j)
+			{
+				dtPolyRef pastRef = resultRef[j];
+				
+				// Connected polys do not overlap.
+				bool connected = false;
+				for (unsigned int k = curPoly->firstLink; k != DT_NULL_LINK; k = curTile->links[k].next)
+				{
+					if (curTile->links[k].ref == pastRef)
+					{
+						connected = true;
+						break;
+					}
+				}
+				if (connected)
+					continue;
+				
+				// Potentially overlapping.
+				const dtMeshTile* pastTile = 0;
+				const dtPoly* pastPoly = 0;
+				m_nav->getTileAndPolyByRefUnsafe(pastRef, &pastTile, &pastPoly);
+				
+				// Get vertices and test overlap
+				const int npb = pastPoly->vertCount;
+				for (int k = 0; k < npb; ++k)
+					dtVcopy(&pb[k*3], &pastTile->verts[pastPoly->verts[k]*3]);
+				
+				if (dtOverlapPolyPoly2D(pa,npa, pb,npb))
+				{
+					overlap = true;
+					break;
+				}
+			}
+			if (overlap)
+				continue;
+			
+			// This poly is fine, store and advance to the poly.
+			if (n < maxResult)
+			{
+				resultRef[n] = neighbourRef;
+				if (resultParent)
+					resultParent[n] = curRef;
+				++n;
+			}
+			else
+			{
+				status |= DT_BUFFER_TOO_SMALL;
+			}
+			
+			if (nstack < MAX_STACK)
+			{
+				stack[nstack++] = neighbourNode;
+			}
+		}
+	}
+	
+	*resultCount = n;
+	
+	return status;
+}
+
+
+struct dtSegInterval
+{
+	dtPolyRef ref;
+	short tmin, tmax;
+};
+
+static void insertInterval(dtSegInterval* ints, int& nints, const int maxInts,
+						   const short tmin, const short tmax, const dtPolyRef ref)
+{
+	if (nints+1 > maxInts) return;
+	// Find insertion point.
+	int idx = 0;
+	while (idx < nints)
+	{
+		if (tmax <= ints[idx].tmin)
+			break;
+		idx++;
+	}
+	// Move current results.
+	if (nints-idx)
+		memmove(ints+idx+1, ints+idx, sizeof(dtSegInterval)*(nints-idx));
+	// Store
+	ints[idx].ref = ref;
+	ints[idx].tmin = tmin;
+	ints[idx].tmax = tmax;
+	nints++;
+}
+
+/// @par
+///
+/// If the @p segmentRefs parameter is provided, then all polygon segments will be returned. 
+/// Otherwise only the wall segments are returned.
+/// 
+/// A segment that is normally a portal will be included in the result set as a 
+/// wall if the @p filter results in the neighbor polygon becoomming impassable.
+/// 
+/// The @p segmentVerts and @p segmentRefs buffers should normally be sized for the 
+/// maximum segments per polygon of the source navigation mesh.
+/// 
+dtStatus dtNavMeshQuery::getPolyWallSegments(dtPolyRef ref, const dtQueryFilter* filter,
+											 float* segmentVerts, dtPolyRef* segmentRefs, int* segmentCount,
+											 const int maxSegments) const
+{
+	dtAssert(m_nav);
+	
+	*segmentCount = 0;
+	
+	const dtMeshTile* tile = 0;
+	const dtPoly* poly = 0;
+	if (dtStatusFailed(m_nav->getTileAndPolyByRef(ref, &tile, &poly)))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	int n = 0;
+	static const int MAX_INTERVAL = 16;
+	dtSegInterval ints[MAX_INTERVAL];
+	int nints;
+	
+	const bool storePortals = segmentRefs != 0;
+	
+	dtStatus status = DT_SUCCESS;
+	
+	for (int i = 0, j = (int)poly->vertCount-1; i < (int)poly->vertCount; j = i++)
+	{
+		// Skip non-solid edges.
+		nints = 0;
+		if (poly->neis[j] & DT_EXT_LINK)
+		{
+			// Tile border.
+			for (unsigned int k = poly->firstLink; k != DT_NULL_LINK; k = tile->links[k].next)
+			{
+				const dtLink* link = &tile->links[k];
+				if (link->edge == j)
+				{
+					if (link->ref != 0)
+					{
+						const dtMeshTile* neiTile = 0;
+						const dtPoly* neiPoly = 0;
+						m_nav->getTileAndPolyByRefUnsafe(link->ref, &neiTile, &neiPoly);
+						if (filter->passFilter(link->ref, neiTile, neiPoly))
+						{
+							insertInterval(ints, nints, MAX_INTERVAL, link->bmin, link->bmax, link->ref);
+						}
+					}
+				}
+			}
+		}
+		else
+		{
+			// Internal edge
+			dtPolyRef neiRef = 0;
+			if (poly->neis[j])
+			{
+				const unsigned int idx = (unsigned int)(poly->neis[j]-1);
+				neiRef = m_nav->getPolyRefBase(tile) | idx;
+				if (!filter->passFilter(neiRef, tile, &tile->polys[idx]))
+					neiRef = 0;
+			}
+
+			// If the edge leads to another polygon and portals are not stored, skip.
+			if (neiRef != 0 && !storePortals)
+				continue;
+			
+			if (n < maxSegments)
+			{
+				const float* vj = &tile->verts[poly->verts[j]*3];
+				const float* vi = &tile->verts[poly->verts[i]*3];
+				float* seg = &segmentVerts[n*6];
+				dtVcopy(seg+0, vj);
+				dtVcopy(seg+3, vi);
+				if (segmentRefs)
+					segmentRefs[n] = neiRef;
+				n++;
+			}
+			else
+			{
+				status |= DT_BUFFER_TOO_SMALL;
+			}
+			
+			continue;
+		}
+		
+		// Add sentinels
+		insertInterval(ints, nints, MAX_INTERVAL, -1, 0, 0);
+		insertInterval(ints, nints, MAX_INTERVAL, 255, 256, 0);
+		
+		// Store segments.
+		const float* vj = &tile->verts[poly->verts[j]*3];
+		const float* vi = &tile->verts[poly->verts[i]*3];
+		for (int k = 1; k < nints; ++k)
+		{
+			// Portal segment.
+			if (storePortals && ints[k].ref)
+			{
+				const float tmin = ints[k].tmin/255.0f; 
+				const float tmax = ints[k].tmax/255.0f; 
+				if (n < maxSegments)
+				{
+					float* seg = &segmentVerts[n*6];
+					dtVlerp(seg+0, vj,vi, tmin);
+					dtVlerp(seg+3, vj,vi, tmax);
+					if (segmentRefs)
+						segmentRefs[n] = ints[k].ref;
+					n++;
+				}
+				else
+				{
+					status |= DT_BUFFER_TOO_SMALL;
+				}
+			}
+
+			// Wall segment.
+			const int imin = ints[k-1].tmax;
+			const int imax = ints[k].tmin;
+			if (imin != imax)
+			{
+				const float tmin = imin/255.0f; 
+				const float tmax = imax/255.0f; 
+				if (n < maxSegments)
+				{
+					float* seg = &segmentVerts[n*6];
+					dtVlerp(seg+0, vj,vi, tmin);
+					dtVlerp(seg+3, vj,vi, tmax);
+					if (segmentRefs)
+						segmentRefs[n] = 0;
+					n++;
+				}
+				else
+				{
+					status |= DT_BUFFER_TOO_SMALL;
+				}
+			}
+		}
+	}
+	
+	*segmentCount = n;
+	
+	return status;
+}
+
+/// @par
+///
+/// @p hitPos is not adjusted using the height detail data.
+///
+/// @p hitDist will equal the search radius if there is no wall within the 
+/// radius. In this case the values of @p hitPos and @p hitNormal are
+/// undefined.
+///
+/// The normal will become unpredicable if @p hitDist is a very small number.
+///
+dtStatus dtNavMeshQuery::findDistanceToWall(dtPolyRef startRef, const float* centerPos, const float maxRadius,
+											const dtQueryFilter* filter,
+											float* hitDist, float* hitPos, float* hitNormal) const
+{
+	dtAssert(m_nav);
+	dtAssert(m_nodePool);
+	dtAssert(m_openList);
+	
+	// Validate input
+	if (!startRef || !m_nav->isValidPolyRef(startRef))
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	m_nodePool->clear();
+	m_openList->clear();
+	
+	dtNode* startNode = m_nodePool->getNode(startRef);
+	dtVcopy(startNode->pos, centerPos);
+	startNode->pidx = 0;
+	startNode->cost = 0;
+	startNode->total = 0;
+	startNode->id = startRef;
+	startNode->flags = DT_NODE_OPEN;
+	m_openList->push(startNode);
+	
+	float radiusSqr = dtSqr(maxRadius);
+	
+	dtStatus status = DT_SUCCESS;
+	
+	while (!m_openList->empty())
+	{
+		dtNode* bestNode = m_openList->pop();
+		bestNode->flags &= ~DT_NODE_OPEN;
+		bestNode->flags |= DT_NODE_CLOSED;
+		
+		// Get poly and tile.
+		// The API input has been cheked already, skip checking internal data.
+		const dtPolyRef bestRef = bestNode->id;
+		const dtMeshTile* bestTile = 0;
+		const dtPoly* bestPoly = 0;
+		m_nav->getTileAndPolyByRefUnsafe(bestRef, &bestTile, &bestPoly);
+		
+		// Get parent poly and tile.
+		dtPolyRef parentRef = 0;
+		const dtMeshTile* parentTile = 0;
+		const dtPoly* parentPoly = 0;
+		if (bestNode->pidx)
+			parentRef = m_nodePool->getNodeAtIdx(bestNode->pidx)->id;
+		if (parentRef)
+			m_nav->getTileAndPolyByRefUnsafe(parentRef, &parentTile, &parentPoly);
+		
+		// Hit test walls.
+		for (int i = 0, j = (int)bestPoly->vertCount-1; i < (int)bestPoly->vertCount; j = i++)
+		{
+			// Skip non-solid edges.
+			if (bestPoly->neis[j] & DT_EXT_LINK)
+			{
+				// Tile border.
+				bool solid = true;
+				for (unsigned int k = bestPoly->firstLink; k != DT_NULL_LINK; k = bestTile->links[k].next)
+				{
+					const dtLink* link = &bestTile->links[k];
+					if (link->edge == j)
+					{
+						if (link->ref != 0)
+						{
+							const dtMeshTile* neiTile = 0;
+							const dtPoly* neiPoly = 0;
+							m_nav->getTileAndPolyByRefUnsafe(link->ref, &neiTile, &neiPoly);
+							if (filter->passFilter(link->ref, neiTile, neiPoly))
+								solid = false;
+						}
+						break;
+					}
+				}
+				if (!solid) continue;
+			}
+			else if (bestPoly->neis[j])
+			{
+				// Internal edge
+				const unsigned int idx = (unsigned int)(bestPoly->neis[j]-1);
+				const dtPolyRef ref = m_nav->getPolyRefBase(bestTile) | idx;
+				if (filter->passFilter(ref, bestTile, &bestTile->polys[idx]))
+					continue;
+			}
+			
+			// Calc distance to the edge.
+			const float* vj = &bestTile->verts[bestPoly->verts[j]*3];
+			const float* vi = &bestTile->verts[bestPoly->verts[i]*3];
+			float tseg;
+			float distSqr = dtDistancePtSegSqr2D(centerPos, vj, vi, tseg);
+			
+			// Edge is too far, skip.
+			if (distSqr > radiusSqr)
+				continue;
+			
+			// Hit wall, update radius.
+			radiusSqr = distSqr;
+			// Calculate hit pos.
+			hitPos[0] = vj[0] + (vi[0] - vj[0])*tseg;
+			hitPos[1] = vj[1] + (vi[1] - vj[1])*tseg;
+			hitPos[2] = vj[2] + (vi[2] - vj[2])*tseg;
+		}
+		
+		for (unsigned int i = bestPoly->firstLink; i != DT_NULL_LINK; i = bestTile->links[i].next)
+		{
+			const dtLink* link = &bestTile->links[i];
+			dtPolyRef neighbourRef = link->ref;
+			// Skip invalid neighbours and do not follow back to parent.
+			if (!neighbourRef || neighbourRef == parentRef)
+				continue;
+			
+			// Expand to neighbour.
+			const dtMeshTile* neighbourTile = 0;
+			const dtPoly* neighbourPoly = 0;
+			m_nav->getTileAndPolyByRefUnsafe(neighbourRef, &neighbourTile, &neighbourPoly);
+			
+			// Skip off-mesh connections.
+			if (neighbourPoly->getType() == DT_POLYTYPE_OFFMESH_CONNECTION)
+				continue;
+			
+			// Calc distance to the edge.
+			const float* va = &bestTile->verts[bestPoly->verts[link->edge]*3];
+			const float* vb = &bestTile->verts[bestPoly->verts[(link->edge+1) % bestPoly->vertCount]*3];
+			float tseg;
+			float distSqr = dtDistancePtSegSqr2D(centerPos, va, vb, tseg);
+			
+			// If the circle is not touching the next polygon, skip it.
+			if (distSqr > radiusSqr)
+				continue;
+			
+			if (!filter->passFilter(neighbourRef, neighbourTile, neighbourPoly))
+				continue;
+
+			dtNode* neighbourNode = m_nodePool->getNode(neighbourRef);
+			if (!neighbourNode)
+			{
+				status |= DT_OUT_OF_NODES;
+				continue;
+			}
+			
+			if (neighbourNode->flags & DT_NODE_CLOSED)
+				continue;
+			
+			// Cost
+			if (neighbourNode->flags == 0)
+			{
+				getEdgeMidPoint(bestRef, bestPoly, bestTile,
+								neighbourRef, neighbourPoly, neighbourTile, neighbourNode->pos);
+			}
+			
+			const float total = bestNode->total + dtVdist(bestNode->pos, neighbourNode->pos);
+			
+			// The node is already in open list and the new result is worse, skip.
+			if ((neighbourNode->flags & DT_NODE_OPEN) && total >= neighbourNode->total)
+				continue;
+			
+			neighbourNode->id = neighbourRef;
+			neighbourNode->flags = (neighbourNode->flags & ~DT_NODE_CLOSED);
+			neighbourNode->pidx = m_nodePool->getNodeIdx(bestNode);
+			neighbourNode->total = total;
+				
+			if (neighbourNode->flags & DT_NODE_OPEN)
+			{
+				m_openList->modify(neighbourNode);
+			}
+			else
+			{
+				neighbourNode->flags |= DT_NODE_OPEN;
+				m_openList->push(neighbourNode);
+			}
+		}
+	}
+	
+	// Calc hit normal.
+	dtVsub(hitNormal, centerPos, hitPos);
+	dtVnormalize(hitNormal);
+	
+	*hitDist = sqrtf(radiusSqr);
+	
+	return status;
+}
+
+bool dtNavMeshQuery::isValidPolyRef(dtPolyRef ref, const dtQueryFilter* filter) const
+{
+	const dtMeshTile* tile = 0;
+	const dtPoly* poly = 0;
+	dtStatus status = m_nav->getTileAndPolyByRef(ref, &tile, &poly);
+	// If cannot get polygon, assume it does not exists and boundary is invalid.
+	if (dtStatusFailed(status))
+		return false;
+	// If cannot pass filter, assume flags has changed and boundary is invalid.
+	if (!filter->passFilter(ref, tile, poly))
+		return false;
+	return true;
+}
+
+/// @par
+///
+/// The closed list is the list of polygons that were fully evaluated during 
+/// the last navigation graph search. (A* or Dijkstra)
+/// 
+bool dtNavMeshQuery::isInClosedList(dtPolyRef ref) const
+{
+	if (!m_nodePool) return false;
+	const dtNode* node = m_nodePool->findNode(ref);
+	return node && node->flags & DT_NODE_CLOSED;
+}
diff --git a/Engine/lib/recast/Detour/Source/DetourNode.cpp b/Engine/lib/recast/Detour/Source/DetourNode.cpp
new file mode 100644
index 000000000..de7b159bf
--- /dev/null
+++ b/Engine/lib/recast/Detour/Source/DetourNode.cpp
@@ -0,0 +1,164 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include "DetourNode.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+#include "DetourCommon.h"
+#include <string.h>
+
+inline unsigned int dtHashRef(dtPolyRef a)
+{
+	a += ~(a<<15);
+	a ^=  (a>>10);
+	a +=  (a<<3);
+	a ^=  (a>>6);
+	a += ~(a<<11);
+	a ^=  (a>>16);
+	return (unsigned int)a;
+}
+
+//////////////////////////////////////////////////////////////////////////////////////////
+dtNodePool::dtNodePool(int maxNodes, int hashSize) :
+	m_nodes(0),
+	m_first(0),
+	m_next(0),
+	m_maxNodes(maxNodes),
+	m_hashSize(hashSize),
+	m_nodeCount(0)
+{
+	dtAssert(dtNextPow2(m_hashSize) == (unsigned int)m_hashSize);
+	dtAssert(m_maxNodes > 0);
+
+	m_nodes = (dtNode*)dtAlloc(sizeof(dtNode)*m_maxNodes, DT_ALLOC_PERM);
+	m_next = (dtNodeIndex*)dtAlloc(sizeof(dtNodeIndex)*m_maxNodes, DT_ALLOC_PERM);
+	m_first = (dtNodeIndex*)dtAlloc(sizeof(dtNodeIndex)*hashSize, DT_ALLOC_PERM);
+
+	dtAssert(m_nodes);
+	dtAssert(m_next);
+	dtAssert(m_first);
+
+	memset(m_first, 0xff, sizeof(dtNodeIndex)*m_hashSize);
+	memset(m_next, 0xff, sizeof(dtNodeIndex)*m_maxNodes);
+}
+
+dtNodePool::~dtNodePool()
+{
+	dtFree(m_nodes);
+	dtFree(m_next);
+	dtFree(m_first);
+}
+
+void dtNodePool::clear()
+{
+	memset(m_first, 0xff, sizeof(dtNodeIndex)*m_hashSize);
+	m_nodeCount = 0;
+}
+
+dtNode* dtNodePool::findNode(dtPolyRef id)
+{
+	unsigned int bucket = dtHashRef(id) & (m_hashSize-1);
+	dtNodeIndex i = m_first[bucket];
+	while (i != DT_NULL_IDX)
+	{
+		if (m_nodes[i].id == id)
+			return &m_nodes[i];
+		i = m_next[i];
+	}
+	return 0;
+}
+
+dtNode* dtNodePool::getNode(dtPolyRef id)
+{
+	unsigned int bucket = dtHashRef(id) & (m_hashSize-1);
+	dtNodeIndex i = m_first[bucket];
+	dtNode* node = 0;
+	while (i != DT_NULL_IDX)
+	{
+		if (m_nodes[i].id == id)
+			return &m_nodes[i];
+		i = m_next[i];
+	}
+	
+	if (m_nodeCount >= m_maxNodes)
+		return 0;
+	
+	i = (dtNodeIndex)m_nodeCount;
+	m_nodeCount++;
+	
+	// Init node
+	node = &m_nodes[i];
+	node->pidx = 0;
+	node->cost = 0;
+	node->total = 0;
+	node->id = id;
+	node->flags = 0;
+	
+	m_next[i] = m_first[bucket];
+	m_first[bucket] = i;
+	
+	return node;
+}
+
+
+//////////////////////////////////////////////////////////////////////////////////////////
+dtNodeQueue::dtNodeQueue(int n) :
+	m_heap(0),
+	m_capacity(n),
+	m_size(0)
+{
+	dtAssert(m_capacity > 0);
+	
+	m_heap = (dtNode**)dtAlloc(sizeof(dtNode*)*(m_capacity+1), DT_ALLOC_PERM);
+	dtAssert(m_heap);
+}
+
+dtNodeQueue::~dtNodeQueue()
+{
+	dtFree(m_heap);
+}
+
+void dtNodeQueue::bubbleUp(int i, dtNode* node)
+{
+	int parent = (i-1)/2;
+	// note: (index > 0) means there is a parent
+	while ((i > 0) && (m_heap[parent]->total > node->total))
+	{
+		m_heap[i] = m_heap[parent];
+		i = parent;
+		parent = (i-1)/2;
+	}
+	m_heap[i] = node;
+}
+
+void dtNodeQueue::trickleDown(int i, dtNode* node)
+{
+	int child = (i*2)+1;
+	while (child < m_size)
+	{
+		if (((child+1) < m_size) && 
+			(m_heap[child]->total > m_heap[child+1]->total))
+		{
+			child++;
+		}
+		m_heap[i] = m_heap[child];
+		i = child;
+		child = (i*2)+1;
+	}
+	bubbleUp(i, node);
+}
diff --git a/Engine/lib/recast/DetourCrowd/CMakeLists.txt b/Engine/lib/recast/DetourCrowd/CMakeLists.txt
new file mode 100644
index 000000000..0c34e1bd3
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/CMakeLists.txt
@@ -0,0 +1,27 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+SET(detourcrowd_SRCS
+	Source/DetourPathCorridor.cpp
+	Source/DetourLocalBoundary.cpp
+	Source/DetourObstacleAvoidance.cpp
+	Source/DetourPathQueue.cpp
+	Source/DetourCrowd.cpp
+	Source/DetourProximityGrid.cpp
+)
+
+SET(detourcrowd_HDRS
+	Include/DetourPathCorridor.h
+	Include/DetourCrowd.h
+	Include/DetourObstacleAvoidance.h
+	Include/DetourLocalBoundary.h
+	Include/DetourProximityGrid.h
+	Include/DetourPathQueue.h
+)
+
+INCLUDE_DIRECTORIES(Include 
+	../Detour/Include
+	../DetourTileCache
+	../Recast/Include
+)
+
+ADD_LIBRARY(DetourCrowd ${detourcrowd_SRCS} ${detourcrowd_HDRS})
diff --git a/Engine/lib/recast/DetourCrowd/Include/DetourCrowd.h b/Engine/lib/recast/DetourCrowd/Include/DetourCrowd.h
new file mode 100644
index 000000000..e789fd34e
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Include/DetourCrowd.h
@@ -0,0 +1,432 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURCROWD_H
+#define DETOURCROWD_H
+
+#include "DetourNavMeshQuery.h"
+#include "DetourObstacleAvoidance.h"
+#include "DetourLocalBoundary.h"
+#include "DetourPathCorridor.h"
+#include "DetourProximityGrid.h"
+#include "DetourPathQueue.h"
+
+/// The maximum number of neighbors that a crowd agent can take into account
+/// for steering decisions.
+/// @ingroup crowd
+static const int DT_CROWDAGENT_MAX_NEIGHBOURS = 6;
+
+/// The maximum number of corners a crowd agent will look ahead in the path.
+/// This value is used for sizing the crowd agent corner buffers.
+/// Due to the behavior of the crowd manager, the actual number of useful
+/// corners will be one less than this number.
+/// @ingroup crowd
+static const int DT_CROWDAGENT_MAX_CORNERS = 4;
+
+/// The maximum number of crowd avoidance configurations supported by the
+/// crowd manager.
+/// @ingroup crowd
+/// @see dtObstacleAvoidanceParams, dtCrowd::setObstacleAvoidanceParams(), dtCrowd::getObstacleAvoidanceParams(),
+///		 dtCrowdAgentParams::obstacleAvoidanceType
+static const int DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS = 8;
+
+/// Provides neighbor data for agents managed by the crowd.
+/// @ingroup crowd
+/// @see dtCrowdAgent::neis, dtCrowd
+struct dtCrowdNeighbour
+{
+	int idx;		///< The index of the neighbor in the crowd.
+	float dist;		///< The distance between the current agent and the neighbor.
+};
+
+/// The type of navigation mesh polygon the agent is currently traversing.
+/// @ingroup crowd
+enum CrowdAgentState
+{
+	DT_CROWDAGENT_STATE_INVALID,		///< The agent is not in a valid state.
+	DT_CROWDAGENT_STATE_WALKING,		///< The agent is traversing a normal navigation mesh polygon.
+	DT_CROWDAGENT_STATE_OFFMESH,		///< The agent is traversing an off-mesh connection.
+};
+
+/// Configuration parameters for a crowd agent.
+/// @ingroup crowd
+struct dtCrowdAgentParams
+{
+	float radius;						///< Agent radius. [Limit: >= 0]
+	float height;						///< Agent height. [Limit: > 0]
+	float maxAcceleration;				///< Maximum allowed acceleration. [Limit: >= 0]
+	float maxSpeed;						///< Maximum allowed speed. [Limit: >= 0]
+
+	/// Defines how close a collision element must be before it is considered for steering behaviors. [Limits: > 0]
+	float collisionQueryRange;
+
+	float pathOptimizationRange;		///< The path visibility optimization range. [Limit: > 0]
+
+	/// How aggresive the agent manager should be at avoiding collisions with this agent. [Limit: >= 0]
+	float separationWeight;
+
+	/// Flags that impact steering behavior. (See: #UpdateFlags)
+	unsigned char updateFlags;
+
+	/// The index of the avoidance configuration to use for the agent. 
+	/// [Limits: 0 <= value <= #DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS]
+	unsigned char obstacleAvoidanceType;	
+
+	/// User defined data attached to the agent.
+	void* userData;
+};
+
+enum MoveRequestState
+{
+	DT_CROWDAGENT_TARGET_NONE = 0,
+	DT_CROWDAGENT_TARGET_FAILED,
+	DT_CROWDAGENT_TARGET_VALID,
+	DT_CROWDAGENT_TARGET_REQUESTING,
+	DT_CROWDAGENT_TARGET_WAITING_FOR_QUEUE,
+	DT_CROWDAGENT_TARGET_WAITING_FOR_PATH,
+	DT_CROWDAGENT_TARGET_VELOCITY,
+};
+
+/// Represents an agent managed by a #dtCrowd object.
+/// @ingroup crowd
+struct dtCrowdAgent
+{
+	/// 1 if the agent is active, or 0 if the agent is in an unused slot in the agent pool.
+	unsigned char active;
+
+	/// The type of mesh polygon the agent is traversing. (See: #CrowdAgentState)
+	unsigned char state;
+
+	/// The path corridor the agent is using.
+	dtPathCorridor corridor;
+
+	/// The local boundary data for the agent.
+	dtLocalBoundary boundary;
+	
+	/// Time since the agent's path corridor was optimized.
+	float topologyOptTime;
+	
+	/// The known neighbors of the agent.
+	dtCrowdNeighbour neis[DT_CROWDAGENT_MAX_NEIGHBOURS];
+
+	/// The number of neighbors.
+	int nneis;
+	
+	/// The desired speed.
+	float desiredSpeed;
+
+	float npos[3];		///< The current agent position. [(x, y, z)]
+	float disp[3];
+	float dvel[3];		///< The desired velocity of the agent. [(x, y, z)]
+	float nvel[3];
+	float vel[3];		///< The actual velocity of the agent. [(x, y, z)]
+
+	/// The agent's configuration parameters.
+	dtCrowdAgentParams params;
+
+	/// The local path corridor corners for the agent. (Staight path.) [(x, y, z) * #ncorners]
+	float cornerVerts[DT_CROWDAGENT_MAX_CORNERS*3];
+
+	/// The local path corridor corner flags. (See: #dtStraightPathFlags) [(flags) * #ncorners]
+	unsigned char cornerFlags[DT_CROWDAGENT_MAX_CORNERS];
+
+	/// The reference id of the polygon being entered at the corner. [(polyRef) * #ncorners]
+	dtPolyRef cornerPolys[DT_CROWDAGENT_MAX_CORNERS];
+
+	/// The number of corners.
+	int ncorners;
+	
+	unsigned char targetState;			///< State of the movement request.
+	dtPolyRef targetRef;				///< Target polyref of the movement request.
+	float targetPos[3];					///< Target position of the movement request (or velocity in case of DT_CROWDAGENT_TARGET_VELOCITY).
+	dtPathQueueRef targetPathqRef;		///< Path finder ref.
+	bool targetReplan;					///< Flag indicating that the current path is being replanned.
+	float targetReplanTime;				/// <Time since the agent's target was replanned.
+};
+
+struct dtCrowdAgentAnimation
+{
+	unsigned char active;
+	float initPos[3], startPos[3], endPos[3];
+	dtPolyRef polyRef;
+	float t, tmax;
+};
+
+/// Crowd agent update flags.
+/// @ingroup crowd
+/// @see dtCrowdAgentParams::updateFlags
+enum UpdateFlags
+{
+	DT_CROWD_ANTICIPATE_TURNS = 1,
+	DT_CROWD_OBSTACLE_AVOIDANCE = 2,
+	DT_CROWD_SEPARATION = 4,
+	DT_CROWD_OPTIMIZE_VIS = 8,			///< Use #dtPathCorridor::optimizePathVisibility() to optimize the agent path.
+	DT_CROWD_OPTIMIZE_TOPO = 16,		///< Use dtPathCorridor::optimizePathTopology() to optimize the agent path.
+};
+
+struct dtCrowdAgentDebugInfo
+{
+	int idx;
+	float optStart[3], optEnd[3];
+	dtObstacleAvoidanceDebugData* vod;
+};
+
+/// Provides local steering behaviors for a group of agents. 
+/// @ingroup crowd
+class dtCrowd
+{
+	int m_maxAgents;
+	dtCrowdAgent* m_agents;
+	dtCrowdAgent** m_activeAgents;
+	dtCrowdAgentAnimation* m_agentAnims;
+	
+	dtPathQueue m_pathq;
+
+	dtObstacleAvoidanceParams m_obstacleQueryParams[DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS];
+	dtObstacleAvoidanceQuery* m_obstacleQuery;
+	
+	dtProximityGrid* m_grid;
+	
+	dtPolyRef* m_pathResult;
+	int m_maxPathResult;
+	
+	float m_ext[3];
+	dtQueryFilter m_filter;
+	
+	float m_maxAgentRadius;
+
+	int m_velocitySampleCount;
+
+	dtNavMeshQuery* m_navquery;
+
+	void updateTopologyOptimization(dtCrowdAgent** agents, const int nagents, const float dt);
+	void updateMoveRequest(const float dt);
+	void checkPathValidity(dtCrowdAgent** agents, const int nagents, const float dt);
+
+	inline int getAgentIndex(const dtCrowdAgent* agent) const  { return agent - m_agents; }
+
+	bool requestMoveTargetReplan(const int idx, dtPolyRef ref, const float* pos);
+
+	void purge();
+	
+public:
+	dtCrowd();
+	~dtCrowd();
+	
+	/// Initializes the crowd.  
+	///  @param[in]		maxAgents		The maximum number of agents the crowd can manage. [Limit: >= 1]
+	///  @param[in]		maxAgentRadius	The maximum radius of any agent that will be added to the crowd. [Limit: > 0]
+	///  @param[in]		nav				The navigation mesh to use for planning.
+	/// @return True if the initialization succeeded.
+	bool init(const int maxAgents, const float maxAgentRadius, dtNavMesh* nav);
+	
+	/// Sets the shared avoidance configuration for the specified index.
+	///  @param[in]		idx		The index. [Limits: 0 <= value < #DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS]
+	///  @param[in]		params	The new configuration.
+	void setObstacleAvoidanceParams(const int idx, const dtObstacleAvoidanceParams* params);
+
+	/// Gets the shared avoidance configuration for the specified index.
+	///  @param[in]		idx		The index of the configuration to retreive. 
+	///							[Limits:  0 <= value < #DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS]
+	/// @return The requested configuration.
+	const dtObstacleAvoidanceParams* getObstacleAvoidanceParams(const int idx) const;
+	
+	/// Gets the specified agent from the pool.
+	///	 @param[in]		idx		The agent index. [Limits: 0 <= value < #getAgentCount()]
+	/// @return The requested agent.
+	const dtCrowdAgent* getAgent(const int idx);
+
+	/// The maximum number of agents that can be managed by the object.
+	/// @return The maximum number of agents.
+	const int getAgentCount() const;
+	
+	/// Adds a new agent to the crowd.
+	///  @param[in]		pos		The requested position of the agent. [(x, y, z)]
+	///  @param[in]		params	The configutation of the agent.
+	/// @return The index of the agent in the agent pool. Or -1 if the agent could not be added.
+	int addAgent(const float* pos, const dtCrowdAgentParams* params);
+
+	/// Updates the specified agent's configuration.
+	///  @param[in]		idx		The agent index. [Limits: 0 <= value < #getAgentCount()]
+	///  @param[in]		params	The new agent configuration.
+	void updateAgentParameters(const int idx, const dtCrowdAgentParams* params);
+
+	/// Removes the agent from the crowd.
+	///  @param[in]		idx		The agent index. [Limits: 0 <= value < #getAgentCount()]
+	void removeAgent(const int idx);
+	
+	/// Submits a new move request for the specified agent.
+	///  @param[in]		idx		The agent index. [Limits: 0 <= value < #getAgentCount()]
+	///  @param[in]		ref		The position's polygon reference.
+	///  @param[in]		pos		The position within the polygon. [(x, y, z)]
+	/// @return True if the request was successfully submitted.
+	bool requestMoveTarget(const int idx, dtPolyRef ref, const float* pos);
+
+	/// Submits a new move request for the specified agent.
+	///  @param[in]		idx		The agent index. [Limits: 0 <= value < #getAgentCount()]
+	///  @param[in]		vel		The movement velocity. [(x, y, z)]
+	/// @return True if the request was successfully submitted.
+	bool requestMoveVelocity(const int idx, const float* vel);
+
+	/// Resets any request for the specified agent.
+	///  @param[in]		idx		The agent index. [Limits: 0 <= value < #getAgentCount()]
+	/// @return True if the request was successfully reseted.
+	bool resetMoveTarget(const int idx);
+
+	/// Gets the active agents int the agent pool.
+	///  @param[out]	agents		An array of agent pointers. [(#dtCrowdAgent *) * maxAgents]
+	///  @param[in]		maxAgents	The size of the crowd agent array.
+	/// @return The number of agents returned in @p agents.
+	int getActiveAgents(dtCrowdAgent** agents, const int maxAgents);
+
+	/// Updates the steering and positions of all agents.
+	///  @param[in]		dt		The time, in seconds, to update the simulation. [Limit: > 0]
+	///  @param[out]	debug	A debug object to load with debug information. [Opt]
+	void update(const float dt, dtCrowdAgentDebugInfo* debug);
+	
+	/// Gets the filter used by the crowd.
+	/// @return The filter used by the crowd.
+	const dtQueryFilter* getFilter() const { return &m_filter; }
+
+	/// Gets the filter used by the crowd.
+	/// @return The filter used by the crowd.
+	dtQueryFilter* getEditableFilter() { return &m_filter; }
+
+	/// Gets the search extents [(x, y, z)] used by the crowd for query operations. 
+	/// @return The search extents used by the crowd. [(x, y, z)]
+	const float* getQueryExtents() const { return m_ext; }
+	
+	/// Gets the velocity sample count.
+	/// @return The velocity sample count.
+	inline int getVelocitySampleCount() const { return m_velocitySampleCount; }
+	
+	/// Gets the crowd's proximity grid.
+	/// @return The crowd's proximity grid.
+	const dtProximityGrid* getGrid() const { return m_grid; }
+
+	/// Gets the crowd's path request queue.
+	/// @return The crowd's path request queue.
+	const dtPathQueue* getPathQueue() const { return &m_pathq; }
+
+	/// Gets the query object used by the crowd.
+	const dtNavMeshQuery* getNavMeshQuery() const { return m_navquery; }
+};
+
+/// Allocates a crowd object using the Detour allocator.
+/// @return A crowd object that is ready for initialization, or null on failure.
+///  @ingroup crowd
+dtCrowd* dtAllocCrowd();
+
+/// Frees the specified crowd object using the Detour allocator.
+///  @param[in]		ptr		A crowd object allocated using #dtAllocCrowd
+///  @ingroup crowd
+void dtFreeCrowd(dtCrowd* ptr);
+
+
+#endif // DETOURCROWD_H
+
+///////////////////////////////////////////////////////////////////////////
+
+// This section contains detailed documentation for members that don't have
+// a source file. It reduces clutter in the main section of the header.
+
+/**
+
+@defgroup crowd Crowd
+
+Members in this module implement local steering and dynamic avoidance features.
+
+The crowd is the big beast of the navigation features. It not only handles a 
+lot of the path management for you, but also local steering and dynamic 
+avoidance between members of the crowd. I.e. It can keep your agents from 
+running into each other.
+
+Main class: #dtCrowd
+
+The #dtNavMeshQuery and #dtPathCorridor classes provide perfectly good, easy 
+to use path planning features. But in the end they only give you points that 
+your navigation client should be moving toward. When it comes to deciding things 
+like agent velocity and steering to avoid other agents, that is up to you to 
+implement. Unless, of course, you decide to use #dtCrowd.
+
+Basically, you add an agent to the crowd, providing various configuration 
+settings such as maximum speed and acceleration. You also provide a local 
+target to more toward. The crowd manager then provides, with every update, the 
+new agent position and velocity for the frame. The movement will be 
+constrained to the navigation mesh, and steering will be applied to ensure 
+agents managed by the crowd do not collide with each other.
+
+This is very powerful feature set. But it comes with limitations.
+
+The biggest limitation is that you must give control of the agent's position 
+completely over to the crowd manager. You can update things like maximum speed 
+and acceleration. But in order for the crowd manager to do its thing, it can't 
+allow you to constantly be giving it overrides to position and velocity. So 
+you give up direct control of the agent's movement. It belongs to the crowd.
+
+The second biggest limitation revolves around the fact that the crowd manager 
+deals with local planning. So the agent's target should never be more than 
+256 polygons aways from its current position. If it is, you risk 
+your agent failing to reach its target. So you may still need to do long 
+distance planning and provide the crowd manager with intermediate targets.
+
+Other significant limitations:
+
+- All agents using the crowd manager will use the same #dtQueryFilter.
+- Crowd management is relatively expensive. The maximum agents under crowd 
+  management at any one time is between 20 and 30.  A good place to start
+  is a maximum of 25 agents for 0.5ms per frame.
+
+@note This is a summary list of members.  Use the index or search 
+feature to find minor members.
+
+@struct dtCrowdAgentParams
+@see dtCrowdAgent, dtCrowd::addAgent(), dtCrowd::updateAgentParameters()
+
+@var dtCrowdAgentParams::obstacleAvoidanceType
+@par
+
+#dtCrowd permits agents to use different avoidance configurations.  This value 
+is the index of the #dtObstacleAvoidanceParams within the crowd.
+
+@see dtObstacleAvoidanceParams, dtCrowd::setObstacleAvoidanceParams(), 
+	 dtCrowd::getObstacleAvoidanceParams()
+
+@var dtCrowdAgentParams::collisionQueryRange
+@par
+
+Collision elements include other agents and navigation mesh boundaries.
+
+This value is often based on the agent radius and/or maximum speed. E.g. radius * 8
+
+@var dtCrowdAgentParams::pathOptimizationRange
+@par
+
+Only applicalbe if #updateFlags includes the #DT_CROWD_OPTIMIZE_VIS flag.
+
+This value is often based on the agent radius. E.g. radius * 30
+
+@see dtPathCorridor::optimizePathVisibility()
+
+@var dtCrowdAgentParams::separationWeight
+@par
+
+A higher value will result in agents trying to stay farther away from each other at 
+the cost of more difficult steering in tight spaces.
+
+*/
\ No newline at end of file
diff --git a/Engine/lib/recast/DetourCrowd/Include/DetourLocalBoundary.h b/Engine/lib/recast/DetourCrowd/Include/DetourLocalBoundary.h
new file mode 100644
index 000000000..d77a13690
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Include/DetourLocalBoundary.h
@@ -0,0 +1,61 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURLOCALBOUNDARY_H
+#define DETOURLOCALBOUNDARY_H
+
+#include "DetourNavMeshQuery.h"
+
+
+class dtLocalBoundary
+{
+	static const int MAX_LOCAL_SEGS = 8;
+	static const int MAX_LOCAL_POLYS = 16;
+	
+	struct Segment
+	{
+		float s[6];	///< Segment start/end
+		float d;	///< Distance for pruning.
+	};
+	
+	float m_center[3];
+	Segment m_segs[MAX_LOCAL_SEGS];
+	int m_nsegs;
+	
+	dtPolyRef m_polys[MAX_LOCAL_POLYS];
+	int m_npolys;
+
+	void addSegment(const float dist, const float* seg);
+	
+public:
+	dtLocalBoundary();
+	~dtLocalBoundary();
+	
+	void reset();
+	
+	void update(dtPolyRef ref, const float* pos, const float collisionQueryRange,
+				dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	bool isValid(dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	inline const float* getCenter() const { return m_center; }
+	inline int getSegmentCount() const { return m_nsegs; }
+	inline const float* getSegment(int i) const { return m_segs[i].s; }
+};
+
+#endif // DETOURLOCALBOUNDARY_H
diff --git a/Engine/lib/recast/DetourCrowd/Include/DetourObstacleAvoidance.h b/Engine/lib/recast/DetourCrowd/Include/DetourObstacleAvoidance.h
new file mode 100644
index 000000000..8ff6211e8
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Include/DetourObstacleAvoidance.h
@@ -0,0 +1,154 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOUROBSTACLEAVOIDANCE_H
+#define DETOUROBSTACLEAVOIDANCE_H
+
+struct dtObstacleCircle
+{
+	float p[3];				///< Position of the obstacle
+	float vel[3];			///< Velocity of the obstacle
+	float dvel[3];			///< Velocity of the obstacle
+	float rad;				///< Radius of the obstacle
+	float dp[3], np[3];		///< Use for side selection during sampling.
+};
+
+struct dtObstacleSegment
+{
+	float p[3], q[3];		///< End points of the obstacle segment
+	bool touch;
+};
+
+
+class dtObstacleAvoidanceDebugData
+{
+public:
+	dtObstacleAvoidanceDebugData();
+	~dtObstacleAvoidanceDebugData();
+	
+	bool init(const int maxSamples);
+	void reset();
+	void addSample(const float* vel, const float ssize, const float pen,
+				   const float vpen, const float vcpen, const float spen, const float tpen);
+	
+	void normalizeSamples();
+	
+	inline int getSampleCount() const { return m_nsamples; }
+	inline const float* getSampleVelocity(const int i) const { return &m_vel[i*3]; }
+	inline float getSampleSize(const int i) const { return m_ssize[i]; }
+	inline float getSamplePenalty(const int i) const { return m_pen[i]; }
+	inline float getSampleDesiredVelocityPenalty(const int i) const { return m_vpen[i]; }
+	inline float getSampleCurrentVelocityPenalty(const int i) const { return m_vcpen[i]; }
+	inline float getSamplePreferredSidePenalty(const int i) const { return m_spen[i]; }
+	inline float getSampleCollisionTimePenalty(const int i) const { return m_tpen[i]; }
+
+private:
+	int m_nsamples;
+	int m_maxSamples;
+	float* m_vel;
+	float* m_ssize;
+	float* m_pen;
+	float* m_vpen;
+	float* m_vcpen;
+	float* m_spen;
+	float* m_tpen;
+};
+
+dtObstacleAvoidanceDebugData* dtAllocObstacleAvoidanceDebugData();
+void dtFreeObstacleAvoidanceDebugData(dtObstacleAvoidanceDebugData* ptr);
+
+
+static const int DT_MAX_PATTERN_DIVS = 32;	///< Max numver of adaptive divs.
+static const int DT_MAX_PATTERN_RINGS = 4;	///< Max number of adaptive rings.
+
+struct dtObstacleAvoidanceParams
+{
+	float velBias;
+	float weightDesVel;
+	float weightCurVel;
+	float weightSide;
+	float weightToi;
+	float horizTime;
+	unsigned char gridSize;	///< grid
+	unsigned char adaptiveDivs;	///< adaptive
+	unsigned char adaptiveRings;	///< adaptive
+	unsigned char adaptiveDepth;	///< adaptive
+};
+
+class dtObstacleAvoidanceQuery
+{
+public:
+	dtObstacleAvoidanceQuery();
+	~dtObstacleAvoidanceQuery();
+	
+	bool init(const int maxCircles, const int maxSegments);
+	
+	void reset();
+
+	void addCircle(const float* pos, const float rad,
+				   const float* vel, const float* dvel);
+				   
+	void addSegment(const float* p, const float* q);
+
+	int sampleVelocityGrid(const float* pos, const float rad, const float vmax,
+						   const float* vel, const float* dvel, float* nvel,
+						   const dtObstacleAvoidanceParams* params,
+						   dtObstacleAvoidanceDebugData* debug = 0);
+
+	int sampleVelocityAdaptive(const float* pos, const float rad, const float vmax,
+							   const float* vel, const float* dvel, float* nvel,
+							   const dtObstacleAvoidanceParams* params, 
+							   dtObstacleAvoidanceDebugData* debug = 0);
+	
+	inline int getObstacleCircleCount() const { return m_ncircles; }
+	const dtObstacleCircle* getObstacleCircle(const int i) { return &m_circles[i]; }
+
+	inline int getObstacleSegmentCount() const { return m_nsegments; }
+	const dtObstacleSegment* getObstacleSegment(const int i) { return &m_segments[i]; }
+
+private:
+
+	void prepare(const float* pos, const float* dvel);
+
+	float processSample(const float* vcand, const float cs,
+						const float* pos, const float rad,
+						const float* vel, const float* dvel,
+						dtObstacleAvoidanceDebugData* debug);
+
+	dtObstacleCircle* insertCircle(const float dist);
+	dtObstacleSegment* insertSegment(const float dist);
+
+	dtObstacleAvoidanceParams m_params;
+	float m_invHorizTime;
+	float m_vmax;
+	float m_invVmax;
+
+	int m_maxCircles;
+	dtObstacleCircle* m_circles;
+	int m_ncircles;
+
+	int m_maxSegments;
+	dtObstacleSegment* m_segments;
+	int m_nsegments;
+};
+
+dtObstacleAvoidanceQuery* dtAllocObstacleAvoidanceQuery();
+void dtFreeObstacleAvoidanceQuery(dtObstacleAvoidanceQuery* ptr);
+
+
+#endif // DETOUROBSTACLEAVOIDANCE_H
diff --git a/Engine/lib/recast/DetourCrowd/Include/DetourPathCorridor.h b/Engine/lib/recast/DetourCrowd/Include/DetourPathCorridor.h
new file mode 100644
index 000000000..9544ea52d
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Include/DetourPathCorridor.h
@@ -0,0 +1,144 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOUTPATHCORRIDOR_H
+#define DETOUTPATHCORRIDOR_H
+
+#include "DetourNavMeshQuery.h"
+
+/// Represents a dynamic polygon corridor used to plan agent movement.
+/// @ingroup crowd, detour
+class dtPathCorridor
+{
+	float m_pos[3];
+	float m_target[3];
+	
+	dtPolyRef* m_path;
+	int m_npath;
+	int m_maxPath;
+	
+public:
+	dtPathCorridor();
+	~dtPathCorridor();
+	
+	/// Allocates the corridor's path buffer. 
+	///  @param[in]		maxPath		The maximum path size the corridor can handle.
+	/// @return True if the initialization succeeded.
+	bool init(const int maxPath);
+	
+	/// Resets the path corridor to the specified position.
+	///  @param[in]		ref		The polygon reference containing the position.
+	///  @param[in]		pos		The new position in the corridor. [(x, y, z)]
+	void reset(dtPolyRef ref, const float* pos);
+	
+	/// Finds the corners in the corridor from the position toward the target. (The straightened path.)
+	///  @param[out]	cornerVerts		The corner vertices. [(x, y, z) * cornerCount] [Size: <= maxCorners]
+	///  @param[out]	cornerFlags		The flag for each corner. [(flag) * cornerCount] [Size: <= maxCorners]
+	///  @param[out]	cornerPolys		The polygon reference for each corner. [(polyRef) * cornerCount] 
+	///  								[Size: <= @p maxCorners]
+	///  @param[in]		maxCorners		The maximum number of corners the buffers can hold.
+	///  @param[in]		navquery		The query object used to build the corridor.
+	///  @param[in]		filter			The filter to apply to the operation.
+	/// @return The number of corners returned in the corner buffers. [0 <= value <= @p maxCorners]
+	int findCorners(float* cornerVerts, unsigned char* cornerFlags,
+					dtPolyRef* cornerPolys, const int maxCorners,
+					dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	/// Attempts to optimize the path if the specified point is visible from the current position.
+	///  @param[in]		next					The point to search toward. [(x, y, z])
+	///  @param[in]		pathOptimizationRange	The maximum range to search. [Limit: > 0]
+	///  @param[in]		navquery				The query object used to build the corridor.
+	///  @param[in]		filter					The filter to apply to the operation.			
+	void optimizePathVisibility(const float* next, const float pathOptimizationRange,
+								dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	/// Attempts to optimize the path using a local area search. (Partial replanning.) 
+	///  @param[in]		navquery	The query object used to build the corridor.
+	///  @param[in]		filter		The filter to apply to the operation.	
+	bool optimizePathTopology(dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	bool moveOverOffmeshConnection(dtPolyRef offMeshConRef, dtPolyRef* refs,
+								   float* startPos, float* endPos,
+								   dtNavMeshQuery* navquery);
+
+	bool fixPathStart(dtPolyRef safeRef, const float* safePos);
+
+	bool trimInvalidPath(dtPolyRef safeRef, const float* safePos,
+						 dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	/// Checks the current corridor path to see if its polygon references remain valid. 
+	///  @param[in]		maxLookAhead	The number of polygons from the beginning of the corridor to search.
+	///  @param[in]		navquery		The query object used to build the corridor.
+	///  @param[in]		filter			The filter to apply to the operation.	
+	bool isValid(const int maxLookAhead, dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	/// Moves the position from the current location to the desired location, adjusting the corridor 
+	/// as needed to reflect the change.
+	///  @param[in]		npos		The desired new position. [(x, y, z)]
+	///  @param[in]		navquery	The query object used to build the corridor.
+	///  @param[in]		filter		The filter to apply to the operation.
+	void movePosition(const float* npos, dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+
+	/// Moves the target from the curent location to the desired location, adjusting the corridor
+	/// as needed to reflect the change. 
+	///  @param[in]		npos		The desired new target position. [(x, y, z)]
+	///  @param[in]		navquery	The query object used to build the corridor.
+	///  @param[in]		filter		The filter to apply to the operation.
+	void moveTargetPosition(const float* npos, dtNavMeshQuery* navquery, const dtQueryFilter* filter);
+	
+	/// Loads a new path and target into the corridor.
+	///  @param[in]		target		The target location within the last polygon of the path. [(x, y, z)]
+	///  @param[in]		path		The path corridor. [(polyRef) * @p npolys]
+	///  @param[in]		npath		The number of polygons in the path.
+	void setCorridor(const float* target, const dtPolyRef* polys, const int npath);
+	
+	/// Gets the current position within the corridor. (In the first polygon.)
+	/// @return The current position within the corridor.
+	inline const float* getPos() const { return m_pos; }
+
+	/// Gets the current target within the corridor. (In the last polygon.)
+	/// @return The current target within the corridor.
+	inline const float* getTarget() const { return m_target; }
+	
+	/// The polygon reference id of the first polygon in the corridor, the polygon containing the position.
+	/// @return The polygon reference id of the first polygon in the corridor. (Or zero if there is no path.)
+	inline dtPolyRef getFirstPoly() const { return m_npath ? m_path[0] : 0; }
+
+	/// The polygon reference id of the last polygon in the corridor, the polygon containing the target.
+	/// @return The polygon reference id of the last polygon in the corridor. (Or zero if there is no path.)
+	inline dtPolyRef getLastPoly() const { return m_npath ? m_path[m_npath-1] : 0; }
+	
+	/// The corridor's path.
+	/// @return The corridor's path. [(polyRef) * #getPathCount()]
+	inline const dtPolyRef* getPath() const { return m_path; }
+
+	/// The number of polygons in the current corridor path.
+	/// @return The number of polygons in the current corridor path.
+	inline int getPathCount() const { return m_npath; } 	
+};
+
+int dtMergeCorridorStartMoved(dtPolyRef* path, const int npath, const int maxPath,
+							  const dtPolyRef* visited, const int nvisited);
+
+int dtMergeCorridorEndMoved(dtPolyRef* path, const int npath, const int maxPath,
+							const dtPolyRef* visited, const int nvisited);
+
+int dtMergeCorridorStartShortcut(dtPolyRef* path, const int npath, const int maxPath,
+								 const dtPolyRef* visited, const int nvisited);
+
+#endif // DETOUTPATHCORRIDOR_H
diff --git a/Engine/lib/recast/DetourCrowd/Include/DetourPathQueue.h b/Engine/lib/recast/DetourCrowd/Include/DetourPathQueue.h
new file mode 100644
index 000000000..fe3920b60
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Include/DetourPathQueue.h
@@ -0,0 +1,75 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURPATHQUEUE_H
+#define DETOURPATHQUEUE_H
+
+#include "DetourNavMesh.h"
+#include "DetourNavMeshQuery.h"
+
+static const unsigned int DT_PATHQ_INVALID = 0;
+
+typedef unsigned int dtPathQueueRef;
+
+class dtPathQueue
+{
+	struct PathQuery
+	{
+		dtPathQueueRef ref;
+		/// Path find start and end location.
+		float startPos[3], endPos[3];
+		dtPolyRef startRef, endRef;
+		/// Result.
+		dtPolyRef* path;
+		int npath;
+		/// State.
+		dtStatus status;
+		int keepAlive;
+		const dtQueryFilter* filter; ///< TODO: This is potentially dangerous!
+	};
+	
+	static const int MAX_QUEUE = 8;
+	PathQuery m_queue[MAX_QUEUE];
+	dtPathQueueRef m_nextHandle;
+	int m_maxPathSize;
+	int m_queueHead;
+	dtNavMeshQuery* m_navquery;
+	
+	void purge();
+	
+public:
+	dtPathQueue();
+	~dtPathQueue();
+	
+	bool init(const int maxPathSize, const int maxSearchNodeCount, dtNavMesh* nav);
+	
+	void update(const int maxIters);
+	
+	dtPathQueueRef request(dtPolyRef startRef, dtPolyRef endRef,
+						   const float* startPos, const float* endPos, 
+						   const dtQueryFilter* filter);
+	
+	dtStatus getRequestStatus(dtPathQueueRef ref) const;
+	
+	dtStatus getPathResult(dtPathQueueRef ref, dtPolyRef* path, int* pathSize, const int maxPath);
+	
+	inline const dtNavMeshQuery* getNavQuery() const { return m_navquery; }
+
+};
+
+#endif // DETOURPATHQUEUE_H
diff --git a/Engine/lib/recast/DetourCrowd/Include/DetourProximityGrid.h b/Engine/lib/recast/DetourCrowd/Include/DetourProximityGrid.h
new file mode 100644
index 000000000..b098261e2
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Include/DetourProximityGrid.h
@@ -0,0 +1,70 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURPROXIMITYGRID_H
+#define DETOURPROXIMITYGRID_H
+
+class dtProximityGrid
+{
+	int m_maxItems;
+	float m_cellSize;
+	float m_invCellSize;
+	
+	struct Item
+	{
+		unsigned short id;
+		short x,y;
+		unsigned short next;
+	};
+	Item* m_pool;
+	int m_poolHead;
+	int m_poolSize;
+	
+	unsigned short* m_buckets;
+	int m_bucketsSize;
+	
+	int m_bounds[4];
+	
+public:
+	dtProximityGrid();
+	~dtProximityGrid();
+	
+	bool init(const int maxItems, const float cellSize);
+	
+	void clear();
+	
+	void addItem(const unsigned short id,
+				 const float minx, const float miny,
+				 const float maxx, const float maxy);
+	
+	int queryItems(const float minx, const float miny,
+				   const float maxx, const float maxy,
+				   unsigned short* ids, const int maxIds) const;
+	
+	int getItemCountAt(const int x, const int y) const;
+	
+	inline const int* getBounds() const { return m_bounds; }
+	inline const float getCellSize() const { return m_cellSize; }
+};
+
+dtProximityGrid* dtAllocProximityGrid();
+void dtFreeProximityGrid(dtProximityGrid* ptr);
+
+
+#endif // DETOURPROXIMITYGRID_H
+
diff --git a/Engine/lib/recast/DetourCrowd/Source/DetourCrowd.cpp b/Engine/lib/recast/DetourCrowd/Source/DetourCrowd.cpp
new file mode 100644
index 000000000..e7312efda
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Source/DetourCrowd.cpp
@@ -0,0 +1,1417 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <float.h>
+#include <stdlib.h>
+#include <new>
+#include "DetourCrowd.h"
+#include "DetourNavMesh.h"
+#include "DetourNavMeshQuery.h"
+#include "DetourObstacleAvoidance.h"
+#include "DetourCommon.h"
+#include "DetourAssert.h"
+#include "DetourAlloc.h"
+
+
+dtCrowd* dtAllocCrowd()
+{
+	void* mem = dtAlloc(sizeof(dtCrowd), DT_ALLOC_PERM);
+	if (!mem) return 0;
+	return new(mem) dtCrowd;
+}
+
+void dtFreeCrowd(dtCrowd* ptr)
+{
+	if (!ptr) return;
+	ptr->~dtCrowd();
+	dtFree(ptr);
+}
+
+
+static const int MAX_ITERS_PER_UPDATE = 100;
+
+static const int MAX_PATHQUEUE_NODES = 4096;
+static const int MAX_COMMON_NODES = 512;
+
+inline float tween(const float t, const float t0, const float t1)
+{
+	return dtClamp((t-t0) / (t1-t0), 0.0f, 1.0f);
+}
+
+static void integrate(dtCrowdAgent* ag, const float dt)
+{
+	// Fake dynamic constraint.
+	const float maxDelta = ag->params.maxAcceleration * dt;
+	float dv[3];
+	dtVsub(dv, ag->nvel, ag->vel);
+	float ds = dtVlen(dv);
+	if (ds > maxDelta)
+		dtVscale(dv, dv, maxDelta/ds);
+	dtVadd(ag->vel, ag->vel, dv);
+	
+	// Integrate
+	if (dtVlen(ag->vel) > 0.0001f)
+		dtVmad(ag->npos, ag->npos, ag->vel, dt);
+	else
+		dtVset(ag->vel,0,0,0);
+}
+
+static bool overOffmeshConnection(const dtCrowdAgent* ag, const float radius)
+{
+	if (!ag->ncorners)
+		return false;
+	
+	const bool offMeshConnection = (ag->cornerFlags[ag->ncorners-1] & DT_STRAIGHTPATH_OFFMESH_CONNECTION) ? true : false;
+	if (offMeshConnection)
+	{
+		const float distSq = dtVdist2DSqr(ag->npos, &ag->cornerVerts[(ag->ncorners-1)*3]);
+		if (distSq < radius*radius)
+			return true;
+	}
+	
+	return false;
+}
+
+static float getDistanceToGoal(const dtCrowdAgent* ag, const float range)
+{
+	if (!ag->ncorners)
+		return range;
+	
+	const bool endOfPath = (ag->cornerFlags[ag->ncorners-1] & DT_STRAIGHTPATH_END) ? true : false;
+	if (endOfPath)
+		return dtMin(dtVdist2D(ag->npos, &ag->cornerVerts[(ag->ncorners-1)*3]), range);
+	
+	return range;
+}
+
+static void calcSmoothSteerDirection(const dtCrowdAgent* ag, float* dir)
+{
+	if (!ag->ncorners)
+	{
+		dtVset(dir, 0,0,0);
+		return;
+	}
+	
+	const int ip0 = 0;
+	const int ip1 = dtMin(1, ag->ncorners-1);
+	const float* p0 = &ag->cornerVerts[ip0*3];
+	const float* p1 = &ag->cornerVerts[ip1*3];
+	
+	float dir0[3], dir1[3];
+	dtVsub(dir0, p0, ag->npos);
+	dtVsub(dir1, p1, ag->npos);
+	dir0[1] = 0;
+	dir1[1] = 0;
+	
+	float len0 = dtVlen(dir0);
+	float len1 = dtVlen(dir1);
+	if (len1 > 0.001f)
+		dtVscale(dir1,dir1,1.0f/len1);
+	
+	dir[0] = dir0[0] - dir1[0]*len0*0.5f;
+	dir[1] = 0;
+	dir[2] = dir0[2] - dir1[2]*len0*0.5f;
+	
+	dtVnormalize(dir);
+}
+
+static void calcStraightSteerDirection(const dtCrowdAgent* ag, float* dir)
+{
+	if (!ag->ncorners)
+	{
+		dtVset(dir, 0,0,0);
+		return;
+	}
+	dtVsub(dir, &ag->cornerVerts[0], ag->npos);
+	dir[1] = 0;
+	dtVnormalize(dir);
+}
+
+static int addNeighbour(const int idx, const float dist,
+						dtCrowdNeighbour* neis, const int nneis, const int maxNeis)
+{
+	// Insert neighbour based on the distance.
+	dtCrowdNeighbour* nei = 0;
+	if (!nneis)
+	{
+		nei = &neis[nneis];
+	}
+	else if (dist >= neis[nneis-1].dist)
+	{
+		if (nneis >= maxNeis)
+			return nneis;
+		nei = &neis[nneis];
+	}
+	else
+	{
+		int i;
+		for (i = 0; i < nneis; ++i)
+			if (dist <= neis[i].dist)
+				break;
+		
+		const int tgt = i+1;
+		const int n = dtMin(nneis-i, maxNeis-tgt);
+		
+		dtAssert(tgt+n <= maxNeis);
+		
+		if (n > 0)
+			memmove(&neis[tgt], &neis[i], sizeof(dtCrowdNeighbour)*n);
+		nei = &neis[i];
+	}
+	
+	memset(nei, 0, sizeof(dtCrowdNeighbour));
+	
+	nei->idx = idx;
+	nei->dist = dist;
+	
+	return dtMin(nneis+1, maxNeis);
+}
+
+static int getNeighbours(const float* pos, const float height, const float range,
+						 const dtCrowdAgent* skip, dtCrowdNeighbour* result, const int maxResult,
+						 dtCrowdAgent** agents, const int /*nagents*/, dtProximityGrid* grid)
+{
+	int n = 0;
+	
+	static const int MAX_NEIS = 32;
+	unsigned short ids[MAX_NEIS];
+	int nids = grid->queryItems(pos[0]-range, pos[2]-range,
+								pos[0]+range, pos[2]+range,
+								ids, MAX_NEIS);
+	
+	for (int i = 0; i < nids; ++i)
+	{
+		const dtCrowdAgent* ag = agents[ids[i]];
+		
+		if (ag == skip) continue;
+		
+		// Check for overlap.
+		float diff[3];
+		dtVsub(diff, pos, ag->npos);
+		if (fabsf(diff[1]) >= (height+ag->params.height)/2.0f)
+			continue;
+		diff[1] = 0;
+		const float distSqr = dtVlenSqr(diff);
+		if (distSqr > dtSqr(range))
+			continue;
+		
+		n = addNeighbour(ids[i], distSqr, result, n, maxResult);
+	}
+	return n;
+}
+
+static int addToOptQueue(dtCrowdAgent* newag, dtCrowdAgent** agents, const int nagents, const int maxAgents)
+{
+	// Insert neighbour based on greatest time.
+	int slot = 0;
+	if (!nagents)
+	{
+		slot = nagents;
+	}
+	else if (newag->topologyOptTime <= agents[nagents-1]->topologyOptTime)
+	{
+		if (nagents >= maxAgents)
+			return nagents;
+		slot = nagents;
+	}
+	else
+	{
+		int i;
+		for (i = 0; i < nagents; ++i)
+			if (newag->topologyOptTime >= agents[i]->topologyOptTime)
+				break;
+		
+		const int tgt = i+1;
+		const int n = dtMin(nagents-i, maxAgents-tgt);
+		
+		dtAssert(tgt+n <= maxAgents);
+		
+		if (n > 0)
+			memmove(&agents[tgt], &agents[i], sizeof(dtCrowdAgent*)*n);
+		slot = i;
+	}
+	
+	agents[slot] = newag;
+	
+	return dtMin(nagents+1, maxAgents);
+}
+
+static int addToPathQueue(dtCrowdAgent* newag, dtCrowdAgent** agents, const int nagents, const int maxAgents)
+{
+	// Insert neighbour based on greatest time.
+	int slot = 0;
+	if (!nagents)
+	{
+		slot = nagents;
+	}
+	else if (newag->targetReplanTime <= agents[nagents-1]->targetReplanTime)
+	{
+		if (nagents >= maxAgents)
+			return nagents;
+		slot = nagents;
+	}
+	else
+	{
+		int i;
+		for (i = 0; i < nagents; ++i)
+			if (newag->targetReplanTime >= agents[i]->targetReplanTime)
+				break;
+		
+		const int tgt = i+1;
+		const int n = dtMin(nagents-i, maxAgents-tgt);
+		
+		dtAssert(tgt+n <= maxAgents);
+		
+		if (n > 0)
+			memmove(&agents[tgt], &agents[i], sizeof(dtCrowdAgent*)*n);
+		slot = i;
+	}
+	
+	agents[slot] = newag;
+	
+	return dtMin(nagents+1, maxAgents);
+}
+
+
+/**
+@class dtCrowd
+@par
+
+This is the core class of the @ref crowd module.  See the @ref crowd documentation for a summary
+of the crowd features.
+
+A common method for setting up the crowd is as follows:
+
+-# Allocate the crowd using #dtAllocCrowd.
+-# Initialize the crowd using #init().
+-# Set the avoidance configurations using #setObstacleAvoidanceParams().
+-# Add agents using #addAgent() and make an initial movement request using #requestMoveTarget().
+
+A common process for managing the crowd is as follows:
+
+-# Call #update() to allow the crowd to manage its agents.
+-# Retrieve agent information using #getActiveAgents().
+-# Make movement requests using #requestMoveTarget() when movement goal changes.
+-# Repeat every frame.
+
+Some agent configuration settings can be updated using #updateAgentParameters().  But the crowd owns the
+agent position.  So it is not possible to update an active agent's position.  If agent position
+must be fed back into the crowd, the agent must be removed and re-added.
+
+Notes: 
+
+- Path related information is available for newly added agents only after an #update() has been
+  performed.
+- Agent objects are kept in a pool and re-used.  So it is important when using agent objects to check the value of
+  #dtCrowdAgent::active to determine if the agent is actually in use or not.
+- This class is meant to provide 'local' movement. There is a limit of 256 polygons in the path corridor.  
+  So it is not meant to provide automatic pathfinding services over long distances.
+
+@see dtAllocCrowd(), dtFreeCrowd(), init(), dtCrowdAgent
+
+*/
+
+dtCrowd::dtCrowd() :
+	m_maxAgents(0),
+	m_agents(0),
+	m_activeAgents(0),
+	m_agentAnims(0),
+	m_obstacleQuery(0),
+	m_grid(0),
+	m_pathResult(0),
+	m_maxPathResult(0),
+	m_maxAgentRadius(0),
+	m_velocitySampleCount(0),
+	m_navquery(0)
+{
+}
+
+dtCrowd::~dtCrowd()
+{
+	purge();
+}
+
+void dtCrowd::purge()
+{
+	for (int i = 0; i < m_maxAgents; ++i)
+		m_agents[i].~dtCrowdAgent();
+	dtFree(m_agents);
+	m_agents = 0;
+	m_maxAgents = 0;
+	
+	dtFree(m_activeAgents);
+	m_activeAgents = 0;
+
+	dtFree(m_agentAnims);
+	m_agentAnims = 0;
+	
+	dtFree(m_pathResult);
+	m_pathResult = 0;
+	
+	dtFreeProximityGrid(m_grid);
+	m_grid = 0;
+
+	dtFreeObstacleAvoidanceQuery(m_obstacleQuery);
+	m_obstacleQuery = 0;
+	
+	dtFreeNavMeshQuery(m_navquery);
+	m_navquery = 0;
+}
+
+/// @par
+///
+/// May be called more than once to purge and re-initialize the crowd.
+bool dtCrowd::init(const int maxAgents, const float maxAgentRadius, dtNavMesh* nav)
+{
+	purge();
+	
+	m_maxAgents = maxAgents;
+	m_maxAgentRadius = maxAgentRadius;
+
+	dtVset(m_ext, m_maxAgentRadius*2.0f,m_maxAgentRadius*1.5f,m_maxAgentRadius*2.0f);
+	
+	m_grid = dtAllocProximityGrid();
+	if (!m_grid)
+		return false;
+	if (!m_grid->init(m_maxAgents*4, maxAgentRadius*3))
+		return false;
+	
+	m_obstacleQuery = dtAllocObstacleAvoidanceQuery();
+	if (!m_obstacleQuery)
+		return false;
+	if (!m_obstacleQuery->init(6, 8))
+		return false;
+
+	// Init obstacle query params.
+	memset(m_obstacleQueryParams, 0, sizeof(m_obstacleQueryParams));
+	for (int i = 0; i < DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS; ++i)
+	{
+		dtObstacleAvoidanceParams* params = &m_obstacleQueryParams[i];
+		params->velBias = 0.4f;
+		params->weightDesVel = 2.0f;
+		params->weightCurVel = 0.75f;
+		params->weightSide = 0.75f;
+		params->weightToi = 2.5f;
+		params->horizTime = 2.5f;
+		params->gridSize = 33;
+		params->adaptiveDivs = 7;
+		params->adaptiveRings = 2;
+		params->adaptiveDepth = 5;
+	}
+	
+	// Allocate temp buffer for merging paths.
+	m_maxPathResult = 256;
+	m_pathResult = (dtPolyRef*)dtAlloc(sizeof(dtPolyRef)*m_maxPathResult, DT_ALLOC_PERM);
+	if (!m_pathResult)
+		return false;
+	
+	if (!m_pathq.init(m_maxPathResult, MAX_PATHQUEUE_NODES, nav))
+		return false;
+	
+	m_agents = (dtCrowdAgent*)dtAlloc(sizeof(dtCrowdAgent)*m_maxAgents, DT_ALLOC_PERM);
+	if (!m_agents)
+		return false;
+	
+	m_activeAgents = (dtCrowdAgent**)dtAlloc(sizeof(dtCrowdAgent*)*m_maxAgents, DT_ALLOC_PERM);
+	if (!m_activeAgents)
+		return false;
+
+	m_agentAnims = (dtCrowdAgentAnimation*)dtAlloc(sizeof(dtCrowdAgentAnimation)*m_maxAgents, DT_ALLOC_PERM);
+	if (!m_agentAnims)
+		return false;
+	
+	for (int i = 0; i < m_maxAgents; ++i)
+	{
+		new(&m_agents[i]) dtCrowdAgent();
+		m_agents[i].active = 0;
+		if (!m_agents[i].corridor.init(m_maxPathResult))
+			return false;
+	}
+
+	for (int i = 0; i < m_maxAgents; ++i)
+	{
+		m_agentAnims[i].active = 0;
+	}
+
+	// The navquery is mostly used for local searches, no need for large node pool.
+	m_navquery = dtAllocNavMeshQuery();
+	if (!m_navquery)
+		return false;
+	if (dtStatusFailed(m_navquery->init(nav, MAX_COMMON_NODES)))
+		return false;
+	
+	return true;
+}
+
+void dtCrowd::setObstacleAvoidanceParams(const int idx, const dtObstacleAvoidanceParams* params)
+{
+	if (idx >= 0 && idx < DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS)
+		memcpy(&m_obstacleQueryParams[idx], params, sizeof(dtObstacleAvoidanceParams));
+}
+
+const dtObstacleAvoidanceParams* dtCrowd::getObstacleAvoidanceParams(const int idx) const
+{
+	if (idx >= 0 && idx < DT_CROWD_MAX_OBSTAVOIDANCE_PARAMS)
+		return &m_obstacleQueryParams[idx];
+	return 0;
+}
+
+const int dtCrowd::getAgentCount() const
+{
+	return m_maxAgents;
+}
+
+/// @par
+/// 
+/// Agents in the pool may not be in use.  Check #dtCrowdAgent.active before using the returned object.
+const dtCrowdAgent* dtCrowd::getAgent(const int idx)
+{
+	return &m_agents[idx];
+}
+
+void dtCrowd::updateAgentParameters(const int idx, const dtCrowdAgentParams* params)
+{
+	if (idx < 0 || idx > m_maxAgents)
+		return;
+	memcpy(&m_agents[idx].params, params, sizeof(dtCrowdAgentParams));
+}
+
+/// @par
+///
+/// The agent's position will be constrained to the surface of the navigation mesh.
+int dtCrowd::addAgent(const float* pos, const dtCrowdAgentParams* params)
+{
+	// Find empty slot.
+	int idx = -1;
+	for (int i = 0; i < m_maxAgents; ++i)
+	{
+		if (!m_agents[i].active)
+		{
+			idx = i;
+			break;
+		}
+	}
+	if (idx == -1)
+		return -1;
+	
+	dtCrowdAgent* ag = &m_agents[idx];
+
+	// Find nearest position on navmesh and place the agent there.
+	float nearest[3];
+	dtPolyRef ref;
+	m_navquery->findNearestPoly(pos, m_ext, &m_filter, &ref, nearest);
+	
+	ag->corridor.reset(ref, nearest);
+	ag->boundary.reset();
+
+	updateAgentParameters(idx, params);
+	
+	ag->topologyOptTime = 0;
+	ag->targetReplanTime = 0;
+	ag->nneis = 0;
+	
+	dtVset(ag->dvel, 0,0,0);
+	dtVset(ag->nvel, 0,0,0);
+	dtVset(ag->vel, 0,0,0);
+	dtVcopy(ag->npos, nearest);
+	
+	ag->desiredSpeed = 0;
+
+	if (ref)
+		ag->state = DT_CROWDAGENT_STATE_WALKING;
+	else
+		ag->state = DT_CROWDAGENT_STATE_INVALID;
+	
+	ag->targetState = DT_CROWDAGENT_TARGET_NONE;
+	
+	ag->active = 1;
+
+	return idx;
+}
+
+/// @par
+///
+/// The agent is deactivated and will no longer be processed.  Its #dtCrowdAgent object
+/// is not removed from the pool.  It is marked as inactive so that it is available for reuse.
+void dtCrowd::removeAgent(const int idx)
+{
+	if (idx >= 0 && idx < m_maxAgents)
+	{
+		m_agents[idx].active = 0;
+	}
+}
+
+bool dtCrowd::requestMoveTargetReplan(const int idx, dtPolyRef ref, const float* pos)
+{
+	if (idx < 0 || idx > m_maxAgents)
+		return false;
+	
+	dtCrowdAgent* ag = &m_agents[idx];
+	
+	// Initialize request.
+	ag->targetRef = ref;
+	dtVcopy(ag->targetPos, pos);
+	ag->targetPathqRef = DT_PATHQ_INVALID;
+	ag->targetReplan = true;
+	if (ag->targetRef)
+		ag->targetState = DT_CROWDAGENT_TARGET_REQUESTING;
+	else
+		ag->targetState = DT_CROWDAGENT_TARGET_FAILED;
+	
+	return true;
+}
+
+/// @par
+/// 
+/// This method is used when a new target is set.
+/// 
+/// The position will be constrained to the surface of the navigation mesh.
+///
+/// The request will be processed during the next #update().
+bool dtCrowd::requestMoveTarget(const int idx, dtPolyRef ref, const float* pos)
+{
+	if (idx < 0 || idx > m_maxAgents)
+		return false;
+	if (!ref)
+		return false;
+
+	dtCrowdAgent* ag = &m_agents[idx];
+	
+	// Initialize request.
+	ag->targetRef = ref;
+	dtVcopy(ag->targetPos, pos);
+	ag->targetPathqRef = DT_PATHQ_INVALID;
+	ag->targetReplan = false;
+	if (ag->targetRef)
+		ag->targetState = DT_CROWDAGENT_TARGET_REQUESTING;
+	else
+		ag->targetState = DT_CROWDAGENT_TARGET_FAILED;
+
+	return true;
+}
+
+bool dtCrowd::requestMoveVelocity(const int idx, const float* vel)
+{
+	if (idx < 0 || idx > m_maxAgents)
+		return false;
+	
+	dtCrowdAgent* ag = &m_agents[idx];
+	
+	// Initialize request.
+	ag->targetRef = 0;
+	dtVcopy(ag->targetPos, vel);
+	ag->targetPathqRef = DT_PATHQ_INVALID;
+	ag->targetReplan = false;
+	ag->targetState = DT_CROWDAGENT_TARGET_VELOCITY;
+	
+	return true;
+}
+
+bool dtCrowd::resetMoveTarget(const int idx)
+{
+	if (idx < 0 || idx > m_maxAgents)
+		return false;
+	
+	dtCrowdAgent* ag = &m_agents[idx];
+	
+	// Initialize request.
+	ag->targetRef = 0;
+	dtVset(ag->targetPos, 0,0,0);
+	ag->targetPathqRef = DT_PATHQ_INVALID;
+	ag->targetReplan = false;
+	ag->targetState = DT_CROWDAGENT_TARGET_NONE;
+	
+	return true;
+}
+
+int dtCrowd::getActiveAgents(dtCrowdAgent** agents, const int maxAgents)
+{
+	int n = 0;
+	for (int i = 0; i < m_maxAgents; ++i)
+	{
+		if (!m_agents[i].active) continue;
+		if (n < maxAgents)
+			agents[n++] = &m_agents[i];
+	}
+	return n;
+}
+
+
+void dtCrowd::updateMoveRequest(const float /*dt*/)
+{
+	const int PATH_MAX_AGENTS = 8;
+	dtCrowdAgent* queue[PATH_MAX_AGENTS];
+	int nqueue = 0;
+	
+	// Fire off new requests.
+	for (int i = 0; i < m_maxAgents; ++i)
+	{
+		dtCrowdAgent* ag = &m_agents[i];
+		if (!ag->active)
+			continue;
+		if (ag->state == DT_CROWDAGENT_STATE_INVALID)
+			continue;
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE || ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+			continue;
+
+		if (ag->targetState == DT_CROWDAGENT_TARGET_REQUESTING)
+		{
+			const dtPolyRef* path = ag->corridor.getPath();
+			const int npath = ag->corridor.getPathCount();
+			dtAssert(npath);
+
+			static const int MAX_RES = 32;
+			float reqPos[3];
+			dtPolyRef reqPath[MAX_RES];	// The path to the request location
+			int reqPathCount = 0;
+
+			// Quick seach towards the goal.
+			static const int MAX_ITER = 20;
+			m_navquery->initSlicedFindPath(path[0], ag->targetRef, ag->npos, ag->targetPos, &m_filter);
+			m_navquery->updateSlicedFindPath(MAX_ITER, 0);
+			dtStatus status = 0;
+			if (ag->targetReplan) // && npath > 10)
+			{
+				// Try to use existing steady path during replan if possible.
+				status = m_navquery->finalizeSlicedFindPathPartial(path, npath, reqPath, &reqPathCount, MAX_RES);
+			}
+			else
+			{
+				// Try to move towards target when goal changes.
+				status = m_navquery->finalizeSlicedFindPath(reqPath, &reqPathCount, MAX_RES);
+			}
+
+			if (!dtStatusFailed(status) && reqPathCount > 0)
+			{
+				// In progress or succeed.
+				if (reqPath[reqPathCount-1] != ag->targetRef)
+				{
+					// Partial path, constrain target position inside the last polygon.
+					status = m_navquery->closestPointOnPoly(reqPath[reqPathCount-1], ag->targetPos, reqPos);
+					if (dtStatusFailed(status))
+						reqPathCount = 0;
+				}
+				else
+				{
+					dtVcopy(reqPos, ag->targetPos);
+				}
+			}
+			else
+			{
+				reqPathCount = 0;
+			}
+				
+			if (!reqPathCount)
+			{
+				// Could not find path, start the request from current location.
+				dtVcopy(reqPos, ag->npos);
+				reqPath[0] = path[0];
+				reqPathCount = 1;
+			}
+
+			ag->corridor.setCorridor(reqPos, reqPath, reqPathCount);
+			ag->boundary.reset();
+
+			if (reqPath[reqPathCount-1] == ag->targetRef)
+			{
+				ag->targetState = DT_CROWDAGENT_TARGET_VALID;
+				ag->targetReplanTime = 0.0;
+			}
+			else
+			{
+				// The path is longer or potentially unreachable, full plan.
+				ag->targetState = DT_CROWDAGENT_TARGET_WAITING_FOR_QUEUE;
+			}
+		}
+		
+		if (ag->targetState == DT_CROWDAGENT_TARGET_WAITING_FOR_QUEUE)
+		{
+			nqueue = addToPathQueue(ag, queue, nqueue, PATH_MAX_AGENTS);
+		}
+	}
+
+	for (int i = 0; i < nqueue; ++i)
+	{
+		dtCrowdAgent* ag = queue[i];
+		ag->targetPathqRef = m_pathq.request(ag->corridor.getLastPoly(), ag->targetRef,
+											 ag->corridor.getTarget(), ag->targetPos, &m_filter);
+		if (ag->targetPathqRef != DT_PATHQ_INVALID)
+			ag->targetState = DT_CROWDAGENT_TARGET_WAITING_FOR_PATH;
+	}
+
+	
+	// Update requests.
+	m_pathq.update(MAX_ITERS_PER_UPDATE);
+
+	dtStatus status;
+
+	// Process path results.
+	for (int i = 0; i < m_maxAgents; ++i)
+	{
+		dtCrowdAgent* ag = &m_agents[i];
+		if (!ag->active)
+			continue;
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE || ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+			continue;
+		
+		if (ag->targetState == DT_CROWDAGENT_TARGET_WAITING_FOR_PATH)
+		{
+			// Poll path queue.
+			status = m_pathq.getRequestStatus(ag->targetPathqRef);
+			if (dtStatusFailed(status))
+			{
+				// Path find failed, retry if the target location is still valid.
+				ag->targetPathqRef = DT_PATHQ_INVALID;
+				if (ag->targetRef)
+					ag->targetState = DT_CROWDAGENT_TARGET_REQUESTING;
+				else
+					ag->targetState = DT_CROWDAGENT_TARGET_FAILED;
+				ag->targetReplanTime = 0.0;
+			}
+			else if (dtStatusSucceed(status))
+			{
+				const dtPolyRef* path = ag->corridor.getPath();
+				const int npath = ag->corridor.getPathCount();
+				dtAssert(npath);
+				
+				// Apply results.
+				float targetPos[3];
+				dtVcopy(targetPos, ag->targetPos);
+				
+				dtPolyRef* res = m_pathResult;
+				bool valid = true;
+				int nres = 0;
+				status = m_pathq.getPathResult(ag->targetPathqRef, res, &nres, m_maxPathResult);
+				if (dtStatusFailed(status) || !nres)
+					valid = false;
+				
+				// Merge result and existing path.
+				// The agent might have moved whilst the request is
+				// being processed, so the path may have changed.
+				// We assume that the end of the path is at the same location
+				// where the request was issued.
+				
+				// The last ref in the old path should be the same as
+				// the location where the request was issued..
+				if (valid && path[npath-1] != res[0])
+					valid = false;
+				
+				if (valid)
+				{
+					// Put the old path infront of the old path.
+					if (npath > 1)
+					{
+						// Make space for the old path.
+						if ((npath-1)+nres > m_maxPathResult)
+							nres = m_maxPathResult - (npath-1);
+						
+						memmove(res+npath-1, res, sizeof(dtPolyRef)*nres);
+						// Copy old path in the beginning.
+						memcpy(res, path, sizeof(dtPolyRef)*(npath-1));
+						nres += npath-1;
+						
+						// Remove trackbacks
+						for (int j = 0; j < nres; ++j)
+						{
+							if (j-1 >= 0 && j+1 < nres)
+							{
+								if (res[j-1] == res[j+1])
+								{
+									memmove(res+(j-1), res+(j+1), sizeof(dtPolyRef)*(nres-(j+1)));
+									nres -= 2;
+									j -= 2;
+								}
+							}
+						}
+						
+					}
+					
+					// Check for partial path.
+					if (res[nres-1] != ag->targetRef)
+					{
+						// Partial path, constrain target position inside the last polygon.
+						float nearest[3];
+						status = m_navquery->closestPointOnPoly(res[nres-1], targetPos, nearest);
+						if (dtStatusSucceed(status))
+							dtVcopy(targetPos, nearest);
+						else
+							valid = false;
+					}
+				}
+				
+				if (valid)
+				{
+					// Set current corridor.
+					ag->corridor.setCorridor(targetPos, res, nres);
+					// Force to update boundary.
+					ag->boundary.reset();
+					ag->targetState = DT_CROWDAGENT_TARGET_VALID;
+				}
+				else
+				{
+					// Something went wrong.
+					ag->targetState = DT_CROWDAGENT_TARGET_FAILED;
+				}
+
+				ag->targetReplanTime = 0.0;
+			}
+		}
+	}
+	
+}
+
+
+void dtCrowd::updateTopologyOptimization(dtCrowdAgent** agents, const int nagents, const float dt)
+{
+	if (!nagents)
+		return;
+	
+	const float OPT_TIME_THR = 0.5f; // seconds
+	const int OPT_MAX_AGENTS = 1;
+	dtCrowdAgent* queue[OPT_MAX_AGENTS];
+	int nqueue = 0;
+	
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE || ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+			continue;
+		if ((ag->params.updateFlags & DT_CROWD_OPTIMIZE_TOPO) == 0)
+			continue;
+		ag->topologyOptTime += dt;
+		if (ag->topologyOptTime >= OPT_TIME_THR)
+			nqueue = addToOptQueue(ag, queue, nqueue, OPT_MAX_AGENTS);
+	}
+
+	for (int i = 0; i < nqueue; ++i)
+	{
+		dtCrowdAgent* ag = queue[i];
+		ag->corridor.optimizePathTopology(m_navquery, &m_filter);
+		ag->topologyOptTime = 0;
+	}
+
+}
+
+void dtCrowd::checkPathValidity(dtCrowdAgent** agents, const int nagents, const float dt)
+{
+	static const int CHECK_LOOKAHEAD = 10;
+	static const float TARGET_REPLAN_DELAY = 1.0; // seconds
+	
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE || ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+			continue;
+			
+		ag->targetReplanTime += dt;
+
+		bool replan = false;
+
+		// First check that the current location is valid.
+		const int idx = getAgentIndex(ag);
+		float agentPos[3];
+		dtPolyRef agentRef = ag->corridor.getFirstPoly();
+		dtVcopy(agentPos, ag->npos);
+		if (!m_navquery->isValidPolyRef(agentRef, &m_filter))
+		{
+			// Current location is not valid, try to reposition.
+			// TODO: this can snap agents, how to handle that?
+			float nearest[3];
+			agentRef = 0;
+			m_navquery->findNearestPoly(ag->npos, m_ext, &m_filter, &agentRef, nearest);
+			dtVcopy(agentPos, nearest);
+
+			if (!agentRef)
+			{
+				// Could not find location in navmesh, set state to invalid.
+				ag->corridor.reset(0, agentPos);
+				ag->boundary.reset();
+				ag->state = DT_CROWDAGENT_STATE_INVALID;
+				continue;
+			}
+
+			// Make sure the first polygon is valid, but leave other valid
+			// polygons in the path so that replanner can adjust the path better.
+			ag->corridor.fixPathStart(agentRef, agentPos);
+//			ag->corridor.trimInvalidPath(agentRef, agentPos, m_navquery, &m_filter);
+			ag->boundary.reset();
+			dtVcopy(ag->npos, agentPos);
+
+			replan = true;
+		}
+
+		// Try to recover move request position.
+		if (ag->targetState != DT_CROWDAGENT_TARGET_NONE && ag->targetState != DT_CROWDAGENT_TARGET_FAILED)
+		{
+			if (!m_navquery->isValidPolyRef(ag->targetRef, &m_filter))
+			{
+				// Current target is not valid, try to reposition.
+				float nearest[3];
+				m_navquery->findNearestPoly(ag->targetPos, m_ext, &m_filter, &ag->targetRef, nearest);
+				dtVcopy(ag->targetPos, nearest);
+				replan = true;
+			}
+			if (!ag->targetRef)
+			{
+				// Failed to reposition target, fail moverequest.
+				ag->corridor.reset(agentRef, agentPos);
+				ag->targetState = DT_CROWDAGENT_TARGET_NONE;
+			}
+		}
+
+		// If nearby corridor is not valid, replan.
+		if (!ag->corridor.isValid(CHECK_LOOKAHEAD, m_navquery, &m_filter))
+		{
+			// Fix current path.
+//			ag->corridor.trimInvalidPath(agentRef, agentPos, m_navquery, &m_filter);
+//			ag->boundary.reset();
+			replan = true;
+		}
+		
+		// If the end of the path is near and it is not the requested location, replan.
+		if (ag->targetState == DT_CROWDAGENT_TARGET_VALID)
+		{
+			if (ag->targetReplanTime > TARGET_REPLAN_DELAY &&
+				ag->corridor.getPathCount() < CHECK_LOOKAHEAD &&
+				ag->corridor.getLastPoly() != ag->targetRef)
+				replan = true;
+		}
+
+		// Try to replan path to goal.
+		if (replan)
+		{
+			if (ag->targetState != DT_CROWDAGENT_TARGET_NONE)
+			{
+				requestMoveTargetReplan(idx, ag->targetRef, ag->targetPos);
+			}
+		}
+	}
+}
+	
+void dtCrowd::update(const float dt, dtCrowdAgentDebugInfo* debug)
+{
+	m_velocitySampleCount = 0;
+	
+	const int debugIdx = debug ? debug->idx : -1;
+	
+	dtCrowdAgent** agents = m_activeAgents;
+	int nagents = getActiveAgents(agents, m_maxAgents);
+	
+	// Check that all agents still have valid paths.
+	checkPathValidity(agents, nagents, dt);
+	
+	// Update async move request and path finder.
+	updateMoveRequest(dt);
+
+	// Optimize path topology.
+	updateTopologyOptimization(agents, nagents, dt);
+	
+	// Register agents to proximity grid.
+	m_grid->clear();
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		const float* p = ag->npos;
+		const float r = ag->params.radius;
+		m_grid->addItem((unsigned short)i, p[0]-r, p[2]-r, p[0]+r, p[2]+r);
+	}
+	
+	// Get nearby navmesh segments and agents to collide with.
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+
+		// Update the collision boundary after certain distance has been passed or
+		// if it has become invalid.
+		const float updateThr = ag->params.collisionQueryRange*0.25f;
+		if (dtVdist2DSqr(ag->npos, ag->boundary.getCenter()) > dtSqr(updateThr) ||
+			!ag->boundary.isValid(m_navquery, &m_filter))
+		{
+			ag->boundary.update(ag->corridor.getFirstPoly(), ag->npos, ag->params.collisionQueryRange,
+								m_navquery, &m_filter);
+		}
+		// Query neighbour agents
+		ag->nneis = getNeighbours(ag->npos, ag->params.height, ag->params.collisionQueryRange,
+								  ag, ag->neis, DT_CROWDAGENT_MAX_NEIGHBOURS,
+								  agents, nagents, m_grid);
+		for (int j = 0; j < ag->nneis; j++)
+			ag->neis[j].idx = getAgentIndex(agents[ag->neis[j].idx]);
+	}
+	
+	// Find next corner to steer to.
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE || ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+			continue;
+		
+		// Find corners for steering
+		ag->ncorners = ag->corridor.findCorners(ag->cornerVerts, ag->cornerFlags, ag->cornerPolys,
+												DT_CROWDAGENT_MAX_CORNERS, m_navquery, &m_filter);
+		
+		// Check to see if the corner after the next corner is directly visible,
+		// and short cut to there.
+		if ((ag->params.updateFlags & DT_CROWD_OPTIMIZE_VIS) && ag->ncorners > 0)
+		{
+			const float* target = &ag->cornerVerts[dtMin(1,ag->ncorners-1)*3];
+			ag->corridor.optimizePathVisibility(target, ag->params.pathOptimizationRange, m_navquery, &m_filter);
+			
+			// Copy data for debug purposes.
+			if (debugIdx == i)
+			{
+				dtVcopy(debug->optStart, ag->corridor.getPos());
+				dtVcopy(debug->optEnd, target);
+			}
+		}
+		else
+		{
+			// Copy data for debug purposes.
+			if (debugIdx == i)
+			{
+				dtVset(debug->optStart, 0,0,0);
+				dtVset(debug->optEnd, 0,0,0);
+			}
+		}
+	}
+	
+	// Trigger off-mesh connections (depends on corners).
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE || ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+			continue;
+		
+		// Check 
+		const float triggerRadius = ag->params.radius*2.25f;
+		if (overOffmeshConnection(ag, triggerRadius))
+		{
+			// Prepare to off-mesh connection.
+			const int idx = ag - m_agents;
+			dtCrowdAgentAnimation* anim = &m_agentAnims[idx];
+			
+			// Adjust the path over the off-mesh connection.
+			dtPolyRef refs[2];
+			if (ag->corridor.moveOverOffmeshConnection(ag->cornerPolys[ag->ncorners-1], refs,
+													   anim->startPos, anim->endPos, m_navquery))
+			{
+				dtVcopy(anim->initPos, ag->npos);
+				anim->polyRef = refs[1];
+				anim->active = 1;
+				anim->t = 0.0f;
+				anim->tmax = (dtVdist2D(anim->startPos, anim->endPos) / ag->params.maxSpeed) * 0.5f;
+				
+				ag->state = DT_CROWDAGENT_STATE_OFFMESH;
+				ag->ncorners = 0;
+				ag->nneis = 0;
+				continue;
+			}
+			else
+			{
+				// Path validity check will ensure that bad/blocked connections will be replanned.
+			}
+		}
+	}
+		
+	// Calculate steering.
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE)
+			continue;
+		
+		float dvel[3] = {0,0,0};
+
+		if (ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+		{
+			dtVcopy(dvel, ag->targetPos);
+			ag->desiredSpeed = dtVlen(ag->targetPos);
+		}
+		else
+		{
+			// Calculate steering direction.
+			if (ag->params.updateFlags & DT_CROWD_ANTICIPATE_TURNS)
+				calcSmoothSteerDirection(ag, dvel);
+			else
+				calcStraightSteerDirection(ag, dvel);
+			
+			// Calculate speed scale, which tells the agent to slowdown at the end of the path.
+			const float slowDownRadius = ag->params.radius*2;	// TODO: make less hacky.
+			const float speedScale = getDistanceToGoal(ag, slowDownRadius) / slowDownRadius;
+				
+			ag->desiredSpeed = ag->params.maxSpeed;
+			dtVscale(dvel, dvel, ag->desiredSpeed * speedScale);
+		}
+
+		// Separation
+		if (ag->params.updateFlags & DT_CROWD_SEPARATION)
+		{
+			const float separationDist = ag->params.collisionQueryRange; 
+			const float invSeparationDist = 1.0f / separationDist; 
+			const float separationWeight = ag->params.separationWeight;
+			
+			float w = 0;
+			float disp[3] = {0,0,0};
+			
+			for (int j = 0; j < ag->nneis; ++j)
+			{
+				const dtCrowdAgent* nei = &m_agents[ag->neis[j].idx];
+				
+				float diff[3];
+				dtVsub(diff, ag->npos, nei->npos);
+				diff[1] = 0;
+				
+				const float distSqr = dtVlenSqr(diff);
+				if (distSqr < 0.00001f)
+					continue;
+				if (distSqr > dtSqr(separationDist))
+					continue;
+				const float dist = sqrtf(distSqr);
+				const float weight = separationWeight * (1.0f - dtSqr(dist*invSeparationDist));
+				
+				dtVmad(disp, disp, diff, weight/dist);
+				w += 1.0f;
+			}
+			
+			if (w > 0.0001f)
+			{
+				// Adjust desired velocity.
+				dtVmad(dvel, dvel, disp, 1.0f/w);
+				// Clamp desired velocity to desired speed.
+				const float speedSqr = dtVlenSqr(dvel);
+				const float desiredSqr = dtSqr(ag->desiredSpeed);
+				if (speedSqr > desiredSqr)
+					dtVscale(dvel, dvel, desiredSqr/speedSqr);
+			}
+		}
+		
+		// Set the desired velocity.
+		dtVcopy(ag->dvel, dvel);
+	}
+	
+	// Velocity planning.	
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+		
+		if (ag->params.updateFlags & DT_CROWD_OBSTACLE_AVOIDANCE)
+		{
+			m_obstacleQuery->reset();
+			
+			// Add neighbours as obstacles.
+			for (int j = 0; j < ag->nneis; ++j)
+			{
+				const dtCrowdAgent* nei = &m_agents[ag->neis[j].idx];
+				m_obstacleQuery->addCircle(nei->npos, nei->params.radius, nei->vel, nei->dvel);
+			}
+
+			// Append neighbour segments as obstacles.
+			for (int j = 0; j < ag->boundary.getSegmentCount(); ++j)
+			{
+				const float* s = ag->boundary.getSegment(j);
+				if (dtTriArea2D(ag->npos, s, s+3) < 0.0f)
+					continue;
+				m_obstacleQuery->addSegment(s, s+3);
+			}
+
+			dtObstacleAvoidanceDebugData* vod = 0;
+			if (debugIdx == i) 
+				vod = debug->vod;
+			
+			// Sample new safe velocity.
+			bool adaptive = true;
+			int ns = 0;
+
+			const dtObstacleAvoidanceParams* params = &m_obstacleQueryParams[ag->params.obstacleAvoidanceType];
+				
+			if (adaptive)
+			{
+				ns = m_obstacleQuery->sampleVelocityAdaptive(ag->npos, ag->params.radius, ag->desiredSpeed,
+															 ag->vel, ag->dvel, ag->nvel, params, vod);
+			}
+			else
+			{
+				ns = m_obstacleQuery->sampleVelocityGrid(ag->npos, ag->params.radius, ag->desiredSpeed,
+														 ag->vel, ag->dvel, ag->nvel, params, vod);
+			}
+			m_velocitySampleCount += ns;
+		}
+		else
+		{
+			// If not using velocity planning, new velocity is directly the desired velocity.
+			dtVcopy(ag->nvel, ag->dvel);
+		}
+	}
+
+	// Integrate.
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+		integrate(ag, dt);
+	}
+	
+	// Handle collisions.
+	static const float COLLISION_RESOLVE_FACTOR = 0.7f;
+	
+	for (int iter = 0; iter < 4; ++iter)
+	{
+		for (int i = 0; i < nagents; ++i)
+		{
+			dtCrowdAgent* ag = agents[i];
+			const int idx0 = getAgentIndex(ag);
+			
+			if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+				continue;
+
+			dtVset(ag->disp, 0,0,0);
+			
+			float w = 0;
+
+			for (int j = 0; j < ag->nneis; ++j)
+			{
+				const dtCrowdAgent* nei = &m_agents[ag->neis[j].idx];
+				const int idx1 = getAgentIndex(nei);
+
+				float diff[3];
+				dtVsub(diff, ag->npos, nei->npos);
+				diff[1] = 0;
+				
+				float dist = dtVlenSqr(diff);
+				if (dist > dtSqr(ag->params.radius + nei->params.radius))
+					continue;
+				dist = sqrtf(dist);
+				float pen = (ag->params.radius + nei->params.radius) - dist;
+				if (dist < 0.0001f)
+				{
+					// Agents on top of each other, try to choose diverging separation directions.
+					if (idx0 > idx1)
+						dtVset(diff, -ag->dvel[2],0,ag->dvel[0]);
+					else
+						dtVset(diff, ag->dvel[2],0,-ag->dvel[0]);
+					pen = 0.01f;
+				}
+				else
+				{
+					pen = (1.0f/dist) * (pen*0.5f) * COLLISION_RESOLVE_FACTOR;
+				}
+				
+				dtVmad(ag->disp, ag->disp, diff, pen);			
+				
+				w += 1.0f;
+			}
+			
+			if (w > 0.0001f)
+			{
+				const float iw = 1.0f / w;
+				dtVscale(ag->disp, ag->disp, iw);
+			}
+		}
+		
+		for (int i = 0; i < nagents; ++i)
+		{
+			dtCrowdAgent* ag = agents[i];
+			if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+				continue;
+			
+			dtVadd(ag->npos, ag->npos, ag->disp);
+		}
+	}
+	
+	for (int i = 0; i < nagents; ++i)
+	{
+		dtCrowdAgent* ag = agents[i];
+		if (ag->state != DT_CROWDAGENT_STATE_WALKING)
+			continue;
+		
+		// Move along navmesh.
+		ag->corridor.movePosition(ag->npos, m_navquery, &m_filter);
+		// Get valid constrained position back.
+		dtVcopy(ag->npos, ag->corridor.getPos());
+
+		// If not using path, truncate the corridor to just one poly.
+		if (ag->targetState == DT_CROWDAGENT_TARGET_NONE || ag->targetState == DT_CROWDAGENT_TARGET_VELOCITY)
+		{
+			ag->corridor.reset(ag->corridor.getFirstPoly(), ag->npos);
+		}
+
+	}
+	
+	// Update agents using off-mesh connection.
+	for (int i = 0; i < m_maxAgents; ++i)
+	{
+		dtCrowdAgentAnimation* anim = &m_agentAnims[i];
+		if (!anim->active)
+			continue;
+		dtCrowdAgent* ag = agents[i];
+
+		anim->t += dt;
+		if (anim->t > anim->tmax)
+		{
+			// Reset animation
+			anim->active = 0;
+			// Prepare agent for walking.
+			ag->state = DT_CROWDAGENT_STATE_WALKING;
+			continue;
+		}
+		
+		// Update position
+		const float ta = anim->tmax*0.15f;
+		const float tb = anim->tmax;
+		if (anim->t < ta)
+		{
+			const float u = tween(anim->t, 0.0, ta);
+			dtVlerp(ag->npos, anim->initPos, anim->startPos, u);
+		}
+		else
+		{
+			const float u = tween(anim->t, ta, tb);
+			dtVlerp(ag->npos, anim->startPos, anim->endPos, u);
+		}
+			
+		// Update velocity.
+		dtVset(ag->vel, 0,0,0);
+		dtVset(ag->dvel, 0,0,0);
+	}
+	
+}
+
+
diff --git a/Engine/lib/recast/DetourCrowd/Source/DetourLocalBoundary.cpp b/Engine/lib/recast/DetourCrowd/Source/DetourLocalBoundary.cpp
new file mode 100644
index 000000000..79d432206
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Source/DetourLocalBoundary.cpp
@@ -0,0 +1,137 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <float.h>
+#include <string.h>
+#include "DetourLocalBoundary.h"
+#include "DetourNavMeshQuery.h"
+#include "DetourCommon.h"
+#include "DetourAssert.h"
+
+
+dtLocalBoundary::dtLocalBoundary() :
+	m_nsegs(0),
+	m_npolys(0)
+{
+	dtVset(m_center, FLT_MAX,FLT_MAX,FLT_MAX);
+}
+
+dtLocalBoundary::~dtLocalBoundary()
+{
+}
+
+void dtLocalBoundary::reset()
+{
+	dtVset(m_center, FLT_MAX,FLT_MAX,FLT_MAX);
+	m_npolys = 0;
+	m_nsegs = 0;
+}
+
+void dtLocalBoundary::addSegment(const float dist, const float* s)
+{
+	// Insert neighbour based on the distance.
+	Segment* seg = 0;
+	if (!m_nsegs)
+	{
+		// First, trivial accept.
+		seg = &m_segs[0];
+	}
+	else if (dist >= m_segs[m_nsegs-1].d)
+	{
+		// Further than the last segment, skip.
+		if (m_nsegs >= MAX_LOCAL_SEGS)
+			return;
+		// Last, trivial accept.
+		seg = &m_segs[m_nsegs];
+	}
+	else
+	{
+		// Insert inbetween.
+		int i;
+		for (i = 0; i < m_nsegs; ++i)
+			if (dist <= m_segs[i].d)
+				break;
+		const int tgt = i+1;
+		const int n = dtMin(m_nsegs-i, MAX_LOCAL_SEGS-tgt);
+		dtAssert(tgt+n <= MAX_LOCAL_SEGS);
+		if (n > 0)
+			memmove(&m_segs[tgt], &m_segs[i], sizeof(Segment)*n);
+		seg = &m_segs[i];
+	}
+	
+	seg->d = dist;
+	memcpy(seg->s, s, sizeof(float)*6);
+	
+	if (m_nsegs < MAX_LOCAL_SEGS)
+		m_nsegs++;
+}
+
+void dtLocalBoundary::update(dtPolyRef ref, const float* pos, const float collisionQueryRange,
+							 dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	static const int MAX_SEGS_PER_POLY = DT_VERTS_PER_POLYGON*3;
+	
+	if (!ref)
+	{
+		dtVset(m_center, FLT_MAX,FLT_MAX,FLT_MAX);
+		m_nsegs = 0;
+		m_npolys = 0;
+		return;
+	}
+	
+	dtVcopy(m_center, pos);
+	
+	// First query non-overlapping polygons.
+	navquery->findLocalNeighbourhood(ref, pos, collisionQueryRange,
+									 filter, m_polys, 0, &m_npolys, MAX_LOCAL_POLYS);
+	
+	// Secondly, store all polygon edges.
+	m_nsegs = 0;
+	float segs[MAX_SEGS_PER_POLY*6];
+	int nsegs = 0;
+	for (int j = 0; j < m_npolys; ++j)
+	{
+		navquery->getPolyWallSegments(m_polys[j], filter, segs, 0, &nsegs, MAX_SEGS_PER_POLY);
+		for (int k = 0; k < nsegs; ++k)
+		{
+			const float* s = &segs[k*6];
+			// Skip too distant segments.
+			float tseg;
+			const float distSqr = dtDistancePtSegSqr2D(pos, s, s+3, tseg);
+			if (distSqr > dtSqr(collisionQueryRange))
+				continue;
+			addSegment(distSqr, s);
+		}
+	}
+}
+
+bool dtLocalBoundary::isValid(dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	if (!m_npolys)
+		return false;
+	
+	// Check that all polygons still pass query filter.
+	for (int i = 0; i < m_npolys; ++i)
+	{
+		if (!navquery->isValidPolyRef(m_polys[i], filter))
+			return false;
+	}
+	
+	return true;
+}
+
diff --git a/Engine/lib/recast/DetourCrowd/Source/DetourObstacleAvoidance.cpp b/Engine/lib/recast/DetourCrowd/Source/DetourObstacleAvoidance.cpp
new file mode 100644
index 000000000..d3f90b7ab
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Source/DetourObstacleAvoidance.cpp
@@ -0,0 +1,544 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include "DetourObstacleAvoidance.h"
+#include "DetourCommon.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+#include <string.h>
+#include <math.h>
+#include <float.h>
+#include <new>
+
+static const float DT_PI = 3.14159265f;
+
+static int sweepCircleCircle(const float* c0, const float r0, const float* v,
+							 const float* c1, const float r1,
+							 float& tmin, float& tmax)
+{
+	static const float EPS = 0.0001f;
+	float s[3];
+	dtVsub(s,c1,c0);
+	float r = r0+r1;
+	float c = dtVdot2D(s,s) - r*r;
+	float a = dtVdot2D(v,v);
+	if (a < EPS) return 0;	// not moving
+	
+	// Overlap, calc time to exit.
+	float b = dtVdot2D(v,s);
+	float d = b*b - a*c;
+	if (d < 0.0f) return 0; // no intersection.
+	a = 1.0f / a;
+	const float rd = dtSqrt(d);
+	tmin = (b - rd) * a;
+	tmax = (b + rd) * a;
+	return 1;
+}
+
+static int isectRaySeg(const float* ap, const float* u,
+					   const float* bp, const float* bq,
+					   float& t)
+{
+	float v[3], w[3];
+	dtVsub(v,bq,bp);
+	dtVsub(w,ap,bp);
+	float d = dtVperp2D(u,v);
+	if (fabsf(d) < 1e-6f) return 0;
+	d = 1.0f/d;
+	t = dtVperp2D(v,w) * d;
+	if (t < 0 || t > 1) return 0;
+	float s = dtVperp2D(u,w) * d;
+	if (s < 0 || s > 1) return 0;
+	return 1;
+}
+
+
+
+dtObstacleAvoidanceDebugData* dtAllocObstacleAvoidanceDebugData()
+{
+	void* mem = dtAlloc(sizeof(dtObstacleAvoidanceDebugData), DT_ALLOC_PERM);
+	if (!mem) return 0;
+	return new(mem) dtObstacleAvoidanceDebugData;
+}
+
+void dtFreeObstacleAvoidanceDebugData(dtObstacleAvoidanceDebugData* ptr)
+{
+	if (!ptr) return;
+	ptr->~dtObstacleAvoidanceDebugData();
+	dtFree(ptr);
+}
+
+
+dtObstacleAvoidanceDebugData::dtObstacleAvoidanceDebugData() :
+	m_nsamples(0),
+	m_maxSamples(0),
+	m_vel(0),
+	m_ssize(0),
+	m_pen(0),
+	m_vpen(0),
+	m_vcpen(0),
+	m_spen(0),
+	m_tpen(0)
+{
+}
+
+dtObstacleAvoidanceDebugData::~dtObstacleAvoidanceDebugData()
+{
+	dtFree(m_vel);
+	dtFree(m_ssize);
+	dtFree(m_pen);
+	dtFree(m_vpen);
+	dtFree(m_vcpen);
+	dtFree(m_spen);
+	dtFree(m_tpen);
+}
+		
+bool dtObstacleAvoidanceDebugData::init(const int maxSamples)
+{
+	dtAssert(maxSamples);
+	m_maxSamples = maxSamples;
+
+	m_vel = (float*)dtAlloc(sizeof(float)*3*m_maxSamples, DT_ALLOC_PERM);
+	if (!m_vel)
+		return false;
+	m_pen = (float*)dtAlloc(sizeof(float)*m_maxSamples, DT_ALLOC_PERM);
+	if (!m_pen)
+		return false;
+	m_ssize = (float*)dtAlloc(sizeof(float)*m_maxSamples, DT_ALLOC_PERM);
+	if (!m_ssize)
+		return false;
+	m_vpen = (float*)dtAlloc(sizeof(float)*m_maxSamples, DT_ALLOC_PERM);
+	if (!m_vpen)
+		return false;
+	m_vcpen = (float*)dtAlloc(sizeof(float)*m_maxSamples, DT_ALLOC_PERM);
+	if (!m_vcpen)
+		return false;
+	m_spen = (float*)dtAlloc(sizeof(float)*m_maxSamples, DT_ALLOC_PERM);
+	if (!m_spen)
+		return false;
+	m_tpen = (float*)dtAlloc(sizeof(float)*m_maxSamples, DT_ALLOC_PERM);
+	if (!m_tpen)
+		return false;
+	
+	return true;
+}
+
+void dtObstacleAvoidanceDebugData::reset()
+{
+	m_nsamples = 0;
+}
+
+void dtObstacleAvoidanceDebugData::addSample(const float* vel, const float ssize, const float pen,
+											 const float vpen, const float vcpen, const float spen, const float tpen)
+{
+	if (m_nsamples >= m_maxSamples)
+		return;
+	dtAssert(m_vel);
+	dtAssert(m_ssize);
+	dtAssert(m_pen);
+	dtAssert(m_vpen);
+	dtAssert(m_vcpen);
+	dtAssert(m_spen);
+	dtAssert(m_tpen);
+	dtVcopy(&m_vel[m_nsamples*3], vel);
+	m_ssize[m_nsamples] = ssize;
+	m_pen[m_nsamples] = pen;
+	m_vpen[m_nsamples] = vpen;
+	m_vcpen[m_nsamples] = vcpen;
+	m_spen[m_nsamples] = spen;
+	m_tpen[m_nsamples] = tpen;
+	m_nsamples++;
+}
+
+static void normalizeArray(float* arr, const int n)
+{
+	// Normalize penaly range.
+	float minPen = FLT_MAX;
+	float maxPen = -FLT_MAX;
+	for (int i = 0; i < n; ++i)
+	{
+		minPen = dtMin(minPen, arr[i]);
+		maxPen = dtMax(maxPen, arr[i]);
+	}
+	const float penRange = maxPen-minPen;
+	const float s = penRange > 0.001f ? (1.0f / penRange) : 1;
+	for (int i = 0; i < n; ++i)
+		arr[i] = dtClamp((arr[i]-minPen)*s, 0.0f, 1.0f);
+}
+
+void dtObstacleAvoidanceDebugData::normalizeSamples()
+{
+	normalizeArray(m_pen, m_nsamples);
+	normalizeArray(m_vpen, m_nsamples);
+	normalizeArray(m_vcpen, m_nsamples);
+	normalizeArray(m_spen, m_nsamples);
+	normalizeArray(m_tpen, m_nsamples);
+}
+
+
+dtObstacleAvoidanceQuery* dtAllocObstacleAvoidanceQuery()
+{
+	void* mem = dtAlloc(sizeof(dtObstacleAvoidanceQuery), DT_ALLOC_PERM);
+	if (!mem) return 0;
+	return new(mem) dtObstacleAvoidanceQuery;
+}
+
+void dtFreeObstacleAvoidanceQuery(dtObstacleAvoidanceQuery* ptr)
+{
+	if (!ptr) return;
+	ptr->~dtObstacleAvoidanceQuery();
+	dtFree(ptr);
+}
+
+
+dtObstacleAvoidanceQuery::dtObstacleAvoidanceQuery() :
+	m_maxCircles(0),
+	m_circles(0),
+	m_ncircles(0),
+	m_maxSegments(0),
+	m_segments(0),
+	m_nsegments(0)
+{
+}
+
+dtObstacleAvoidanceQuery::~dtObstacleAvoidanceQuery()
+{
+	dtFree(m_circles);
+	dtFree(m_segments);
+}
+
+bool dtObstacleAvoidanceQuery::init(const int maxCircles, const int maxSegments)
+{
+	m_maxCircles = maxCircles;
+	m_ncircles = 0;
+	m_circles = (dtObstacleCircle*)dtAlloc(sizeof(dtObstacleCircle)*m_maxCircles, DT_ALLOC_PERM);
+	if (!m_circles)
+		return false;
+	memset(m_circles, 0, sizeof(dtObstacleCircle)*m_maxCircles);
+
+	m_maxSegments = maxSegments;
+	m_nsegments = 0;
+	m_segments = (dtObstacleSegment*)dtAlloc(sizeof(dtObstacleSegment)*m_maxSegments, DT_ALLOC_PERM);
+	if (!m_segments)
+		return false;
+	memset(m_segments, 0, sizeof(dtObstacleSegment)*m_maxSegments);
+	
+	return true;
+}
+
+void dtObstacleAvoidanceQuery::reset()
+{
+	m_ncircles = 0;
+	m_nsegments = 0;
+}
+
+void dtObstacleAvoidanceQuery::addCircle(const float* pos, const float rad,
+										 const float* vel, const float* dvel)
+{
+	if (m_ncircles >= m_maxCircles)
+		return;
+		
+	dtObstacleCircle* cir = &m_circles[m_ncircles++];
+	dtVcopy(cir->p, pos);
+	cir->rad = rad;
+	dtVcopy(cir->vel, vel);
+	dtVcopy(cir->dvel, dvel);
+}
+
+void dtObstacleAvoidanceQuery::addSegment(const float* p, const float* q)
+{
+	if (m_nsegments > m_maxSegments)
+		return;
+	
+	dtObstacleSegment* seg = &m_segments[m_nsegments++];
+	dtVcopy(seg->p, p);
+	dtVcopy(seg->q, q);
+}
+
+void dtObstacleAvoidanceQuery::prepare(const float* pos, const float* dvel)
+{
+	// Prepare obstacles
+	for (int i = 0; i < m_ncircles; ++i)
+	{
+		dtObstacleCircle* cir = &m_circles[i];
+		
+		// Side
+		const float* pa = pos;
+		const float* pb = cir->p;
+		
+		const float orig[3] = {0,0};
+		float dv[3];
+		dtVsub(cir->dp,pb,pa);
+		dtVnormalize(cir->dp);
+		dtVsub(dv, cir->dvel, dvel);
+		
+		const float a = dtTriArea2D(orig, cir->dp,dv);
+		if (a < 0.01f)
+		{
+			cir->np[0] = -cir->dp[2];
+			cir->np[2] = cir->dp[0];
+		}
+		else
+		{
+			cir->np[0] = cir->dp[2];
+			cir->np[2] = -cir->dp[0];
+		}
+	}	
+
+	for (int i = 0; i < m_nsegments; ++i)
+	{
+		dtObstacleSegment* seg = &m_segments[i];
+		
+		// Precalc if the agent is really close to the segment.
+		const float r = 0.01f;
+		float t;
+		seg->touch = dtDistancePtSegSqr2D(pos, seg->p, seg->q, t) < dtSqr(r);
+	}	
+}
+
+float dtObstacleAvoidanceQuery::processSample(const float* vcand, const float cs,
+											  const float* pos, const float rad,
+											  const float* vel, const float* dvel,
+											  dtObstacleAvoidanceDebugData* debug)
+{
+	// Find min time of impact and exit amongst all obstacles.
+	float tmin = m_params.horizTime;
+	float side = 0;
+	int nside = 0;
+	
+	for (int i = 0; i < m_ncircles; ++i)
+	{
+		const dtObstacleCircle* cir = &m_circles[i];
+			
+		// RVO
+		float vab[3];
+		dtVscale(vab, vcand, 2);
+		dtVsub(vab, vab, vel);
+		dtVsub(vab, vab, cir->vel);
+		
+		// Side
+		side += dtClamp(dtMin(dtVdot2D(cir->dp,vab)*0.5f+0.5f, dtVdot2D(cir->np,vab)*2), 0.0f, 1.0f);
+		nside++;
+		
+		float htmin = 0, htmax = 0;
+		if (!sweepCircleCircle(pos,rad, vab, cir->p,cir->rad, htmin, htmax))
+			continue;
+		
+		// Handle overlapping obstacles.
+		if (htmin < 0.0f && htmax > 0.0f)
+		{
+			// Avoid more when overlapped.
+			htmin = -htmin * 0.5f;
+		}
+		
+		if (htmin >= 0.0f)
+		{
+			// The closest obstacle is somewhere ahead of us, keep track of nearest obstacle.
+			if (htmin < tmin)
+				tmin = htmin;
+		}
+	}
+
+	for (int i = 0; i < m_nsegments; ++i)
+	{
+		const dtObstacleSegment* seg = &m_segments[i];
+		float htmin = 0;
+		
+		if (seg->touch)
+		{
+			// Special case when the agent is very close to the segment.
+			float sdir[3], snorm[3];
+			dtVsub(sdir, seg->q, seg->p);
+			snorm[0] = -sdir[2];
+			snorm[2] = sdir[0];
+			// If the velocity is pointing towards the segment, no collision.
+			if (dtVdot2D(snorm, vcand) < 0.0f)
+				continue;
+			// Else immediate collision.
+			htmin = 0.0f;
+		}
+		else
+		{
+			if (!isectRaySeg(pos, vcand, seg->p, seg->q, htmin))
+				continue;
+		}
+		
+		// Avoid less when facing walls.
+		htmin *= 2.0f;
+		
+		// The closest obstacle is somewhere ahead of us, keep track of nearest obstacle.
+		if (htmin < tmin)
+			tmin = htmin;
+	}
+	
+	// Normalize side bias, to prevent it dominating too much.
+	if (nside)
+		side /= nside;
+	
+	const float vpen = m_params.weightDesVel * (dtVdist2D(vcand, dvel) * m_invVmax);
+	const float vcpen = m_params.weightCurVel * (dtVdist2D(vcand, vel) * m_invVmax);
+	const float spen = m_params.weightSide * side;
+	const float tpen = m_params.weightToi * (1.0f/(0.1f+tmin*m_invHorizTime));
+	
+	const float penalty = vpen + vcpen + spen + tpen;
+	
+	// Store different penalties for debug viewing
+	if (debug)
+		debug->addSample(vcand, cs, penalty, vpen, vcpen, spen, tpen);
+	
+	return penalty;
+}
+
+int dtObstacleAvoidanceQuery::sampleVelocityGrid(const float* pos, const float rad, const float vmax,
+												 const float* vel, const float* dvel, float* nvel,
+												 const dtObstacleAvoidanceParams* params,
+												 dtObstacleAvoidanceDebugData* debug)
+{
+	prepare(pos, dvel);
+	
+	memcpy(&m_params, params, sizeof(dtObstacleAvoidanceParams));
+	m_invHorizTime = 1.0f / m_params.horizTime;
+	m_vmax = vmax;
+	m_invVmax = 1.0f / vmax;
+	
+	dtVset(nvel, 0,0,0);
+	
+	if (debug)
+		debug->reset();
+
+	const float cvx = dvel[0] * m_params.velBias;
+	const float cvz = dvel[2] * m_params.velBias;
+	const float cs = vmax * 2 * (1 - m_params.velBias) / (float)(m_params.gridSize-1);
+	const float half = (m_params.gridSize-1)*cs*0.5f;
+		
+	float minPenalty = FLT_MAX;
+	int ns = 0;
+		
+	for (int y = 0; y < m_params.gridSize; ++y)
+	{
+		for (int x = 0; x < m_params.gridSize; ++x)
+		{
+			float vcand[3];
+			vcand[0] = cvx + x*cs - half;
+			vcand[1] = 0;
+			vcand[2] = cvz + y*cs - half;
+			
+			if (dtSqr(vcand[0])+dtSqr(vcand[2]) > dtSqr(vmax+cs/2)) continue;
+			
+			const float penalty = processSample(vcand, cs, pos,rad,vel,dvel, debug);
+			ns++;
+			if (penalty < minPenalty)
+			{
+				minPenalty = penalty;
+				dtVcopy(nvel, vcand);
+			}
+		}
+	}
+	
+	return ns;
+}
+
+
+int dtObstacleAvoidanceQuery::sampleVelocityAdaptive(const float* pos, const float rad, const float vmax,
+													 const float* vel, const float* dvel, float* nvel,
+													 const dtObstacleAvoidanceParams* params,
+													 dtObstacleAvoidanceDebugData* debug)
+{
+	prepare(pos, dvel);
+	
+	memcpy(&m_params, params, sizeof(dtObstacleAvoidanceParams));
+	m_invHorizTime = 1.0f / m_params.horizTime;
+	m_vmax = vmax;
+	m_invVmax = 1.0f / vmax;
+	
+	dtVset(nvel, 0,0,0);
+	
+	if (debug)
+		debug->reset();
+
+	// Build sampling pattern aligned to desired velocity.
+	float pat[(DT_MAX_PATTERN_DIVS*DT_MAX_PATTERN_RINGS+1)*2];
+	int npat = 0;
+
+	const int ndivs = (int)m_params.adaptiveDivs;
+	const int nrings= (int)m_params.adaptiveRings;
+	const int depth = (int)m_params.adaptiveDepth;
+	
+	const int nd = dtClamp(ndivs, 1, DT_MAX_PATTERN_DIVS);
+	const int nr = dtClamp(nrings, 1, DT_MAX_PATTERN_RINGS);
+	const float da = (1.0f/nd) * DT_PI*2;
+	const float dang = atan2f(dvel[2], dvel[0]);
+	
+	// Always add sample at zero
+	pat[npat*2+0] = 0;
+	pat[npat*2+1] = 0;
+	npat++;
+	
+	for (int j = 0; j < nr; ++j)
+	{
+		const float r = (float)(nr-j)/(float)nr;
+		float a = dang + (j&1)*0.5f*da;
+		for (int i = 0; i < nd; ++i)
+		{
+			pat[npat*2+0] = cosf(a)*r;
+			pat[npat*2+1] = sinf(a)*r;
+			npat++;
+			a += da;
+		}
+	}
+
+	// Start sampling.
+	float cr = vmax * (1.0f - m_params.velBias);
+	float res[3];
+	dtVset(res, dvel[0] * m_params.velBias, 0, dvel[2] * m_params.velBias);
+	int ns = 0;
+
+	for (int k = 0; k < depth; ++k)
+	{
+		float minPenalty = FLT_MAX;
+		float bvel[3];
+		dtVset(bvel, 0,0,0);
+		
+		for (int i = 0; i < npat; ++i)
+		{
+			float vcand[3];
+			vcand[0] = res[0] + pat[i*2+0]*cr;
+			vcand[1] = 0;
+			vcand[2] = res[2] + pat[i*2+1]*cr;
+			
+			if (dtSqr(vcand[0])+dtSqr(vcand[2]) > dtSqr(vmax+0.001f)) continue;
+			
+			const float penalty = processSample(vcand,cr/10, pos,rad,vel,dvel, debug);
+			ns++;
+			if (penalty < minPenalty)
+			{
+				minPenalty = penalty;
+				dtVcopy(bvel, vcand);
+			}
+		}
+
+		dtVcopy(res, bvel);
+
+		cr *= 0.5f;
+	}	
+	
+	dtVcopy(nvel, res);
+	
+	return ns;
+}
+
diff --git a/Engine/lib/recast/DetourCrowd/Source/DetourPathCorridor.cpp b/Engine/lib/recast/DetourCrowd/Source/DetourPathCorridor.cpp
new file mode 100644
index 000000000..a1bfe0d41
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Source/DetourPathCorridor.cpp
@@ -0,0 +1,588 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <string.h>
+#include "DetourPathCorridor.h"
+#include "DetourNavMeshQuery.h"
+#include "DetourCommon.h"
+#include "DetourAssert.h"
+#include "DetourAlloc.h"
+
+
+int dtMergeCorridorStartMoved(dtPolyRef* path, const int npath, const int maxPath,
+							  const dtPolyRef* visited, const int nvisited)
+{
+	int furthestPath = -1;
+	int furthestVisited = -1;
+	
+	// Find furthest common polygon.
+	for (int i = npath-1; i >= 0; --i)
+	{
+		bool found = false;
+		for (int j = nvisited-1; j >= 0; --j)
+		{
+			if (path[i] == visited[j])
+			{
+				furthestPath = i;
+				furthestVisited = j;
+				found = true;
+			}
+		}
+		if (found)
+			break;
+	}
+	
+	// If no intersection found just return current path. 
+	if (furthestPath == -1 || furthestVisited == -1)
+		return npath;
+	
+	// Concatenate paths.	
+	
+	// Adjust beginning of the buffer to include the visited.
+	const int req = nvisited - furthestVisited;
+	const int orig = dtMin(furthestPath+1, npath);
+	int size = dtMax(0, npath-orig);
+	if (req+size > maxPath)
+		size = maxPath-req;
+	if (size)
+		memmove(path+req, path+orig, size*sizeof(dtPolyRef));
+	
+	// Store visited
+	for (int i = 0; i < req; ++i)
+		path[i] = visited[(nvisited-1)-i];				
+	
+	return req+size;
+}
+
+int dtMergeCorridorEndMoved(dtPolyRef* path, const int npath, const int maxPath,
+							const dtPolyRef* visited, const int nvisited)
+{
+	int furthestPath = -1;
+	int furthestVisited = -1;
+	
+	// Find furthest common polygon.
+	for (int i = 0; i < npath; ++i)
+	{
+		bool found = false;
+		for (int j = nvisited-1; j >= 0; --j)
+		{
+			if (path[i] == visited[j])
+			{
+				furthestPath = i;
+				furthestVisited = j;
+				found = true;
+			}
+		}
+		if (found)
+			break;
+	}
+	
+	// If no intersection found just return current path. 
+	if (furthestPath == -1 || furthestVisited == -1)
+		return npath;
+	
+	// Concatenate paths.
+	const int ppos = furthestPath+1;
+	const int vpos = furthestVisited+1;
+	const int count = dtMin(nvisited-vpos, maxPath-ppos);
+	dtAssert(ppos+count <= maxPath);
+	if (count)
+		memcpy(path+ppos, visited+vpos, sizeof(dtPolyRef)*count);
+	
+	return ppos+count;
+}
+
+int dtMergeCorridorStartShortcut(dtPolyRef* path, const int npath, const int maxPath,
+								 const dtPolyRef* visited, const int nvisited)
+{
+	int furthestPath = -1;
+	int furthestVisited = -1;
+	
+	// Find furthest common polygon.
+	for (int i = npath-1; i >= 0; --i)
+	{
+		bool found = false;
+		for (int j = nvisited-1; j >= 0; --j)
+		{
+			if (path[i] == visited[j])
+			{
+				furthestPath = i;
+				furthestVisited = j;
+				found = true;
+			}
+		}
+		if (found)
+			break;
+	}
+	
+	// If no intersection found just return current path. 
+	if (furthestPath == -1 || furthestVisited == -1)
+		return npath;
+	
+	// Concatenate paths.	
+	
+	// Adjust beginning of the buffer to include the visited.
+	const int req = furthestVisited;
+	if (req <= 0)
+		return npath;
+	
+	const int orig = furthestPath;
+	int size = dtMax(0, npath-orig);
+	if (req+size > maxPath)
+		size = maxPath-req;
+	if (size)
+		memmove(path+req, path+orig, size*sizeof(dtPolyRef));
+	
+	// Store visited
+	for (int i = 0; i < req; ++i)
+		path[i] = visited[i];
+	
+	return req+size;
+}
+
+/**
+@class dtPathCorridor
+@par
+
+The corridor is loaded with a path, usually obtained from a #dtNavMeshQuery::findPath() query. The corridor
+is then used to plan local movement, with the corridor automatically updating as needed to deal with inaccurate 
+agent locomotion.
+
+Example of a common use case:
+
+-# Construct the corridor object and call #init() to allocate its path buffer.
+-# Obtain a path from a #dtNavMeshQuery object.
+-# Use #reset() to set the agent's current position. (At the beginning of the path.)
+-# Use #setCorridor() to load the path and target.
+-# Use #findCorners() to plan movement. (This handles dynamic path straightening.)
+-# Use #movePosition() to feed agent movement back into the corridor. (The corridor will automatically adjust as needed.)
+-# If the target is moving, use #moveTargetPosition() to update the end of the corridor. 
+   (The corridor will automatically adjust as needed.)
+-# Repeat the previous 3 steps to continue to move the agent.
+
+The corridor position and target are always constrained to the navigation mesh.
+
+One of the difficulties in maintaining a path is that floating point errors, locomotion inaccuracies, and/or local 
+steering can result in the agent crossing the boundary of the path corridor, temporarily invalidating the path. 
+This class uses local mesh queries to detect and update the corridor as needed to handle these types of issues. 
+
+The fact that local mesh queries are used to move the position and target locations results in two beahviors that 
+need to be considered:
+
+Every time a move function is used there is a chance that the path will become non-optimial. Basically, the further 
+the target is moved from its original location, and the further the position is moved outside the original corridor, 
+the more likely the path will become non-optimal. This issue can be addressed by periodically running the 
+#optimizePathTopology() and #optimizePathVisibility() methods.
+
+All local mesh queries have distance limitations. (Review the #dtNavMeshQuery methods for details.) So the most accurate 
+use case is to move the position and target in small increments. If a large increment is used, then the corridor 
+may not be able to accurately find the new location.  Because of this limiation, if a position is moved in a large
+increment, then compare the desired and resulting polygon references. If the two do not match, then path replanning 
+may be needed.  E.g. If you move the target, check #getLastPoly() to see if it is the expected polygon.
+
+*/
+
+dtPathCorridor::dtPathCorridor() :
+	m_path(0),
+	m_npath(0),
+	m_maxPath(0)
+{
+}
+
+dtPathCorridor::~dtPathCorridor()
+{
+	dtFree(m_path);
+}
+
+/// @par
+///
+/// @warning Cannot be called more than once.
+bool dtPathCorridor::init(const int maxPath)
+{
+	dtAssert(!m_path);
+	m_path = (dtPolyRef*)dtAlloc(sizeof(dtPolyRef)*maxPath, DT_ALLOC_PERM);
+	if (!m_path)
+		return false;
+	m_npath = 0;
+	m_maxPath = maxPath;
+	return true;
+}
+
+/// @par
+///
+/// Essentially, the corridor is set of one polygon in size with the target
+/// equal to the position.
+void dtPathCorridor::reset(dtPolyRef ref, const float* pos)
+{
+	dtAssert(m_path);
+	dtVcopy(m_pos, pos);
+	dtVcopy(m_target, pos);
+	m_path[0] = ref;
+	m_npath = 1;
+}
+
+/**
+@par
+
+This is the function used to plan local movement within the corridor. One or more corners can be 
+detected in order to plan movement. It performs essentially the same function as #dtNavMeshQuery::findStraightPath.
+
+Due to internal optimizations, the maximum number of corners returned will be (@p maxCorners - 1) 
+For example: If the buffers are sized to hold 10 corners, the function will never return more than 9 corners. 
+So if 10 corners are needed, the buffers should be sized for 11 corners.
+
+If the target is within range, it will be the last corner and have a polygon reference id of zero.
+*/
+int dtPathCorridor::findCorners(float* cornerVerts, unsigned char* cornerFlags,
+							  dtPolyRef* cornerPolys, const int maxCorners,
+							  dtNavMeshQuery* navquery, const dtQueryFilter* /*filter*/)
+{
+	dtAssert(m_path);
+	dtAssert(m_npath);
+	
+	static const float MIN_TARGET_DIST = 0.01f;
+	
+	int ncorners = 0;
+	navquery->findStraightPath(m_pos, m_target, m_path, m_npath,
+							   cornerVerts, cornerFlags, cornerPolys, &ncorners, maxCorners);
+	
+	// Prune points in the beginning of the path which are too close.
+	while (ncorners)
+	{
+		if ((cornerFlags[0] & DT_STRAIGHTPATH_OFFMESH_CONNECTION) ||
+			dtVdist2DSqr(&cornerVerts[0], m_pos) > dtSqr(MIN_TARGET_DIST))
+			break;
+		ncorners--;
+		if (ncorners)
+		{
+			memmove(cornerFlags, cornerFlags+1, sizeof(unsigned char)*ncorners);
+			memmove(cornerPolys, cornerPolys+1, sizeof(dtPolyRef)*ncorners);
+			memmove(cornerVerts, cornerVerts+3, sizeof(float)*3*ncorners);
+		}
+	}
+	
+	// Prune points after an off-mesh connection.
+	for (int i = 0; i < ncorners; ++i)
+	{
+		if (cornerFlags[i] & DT_STRAIGHTPATH_OFFMESH_CONNECTION)
+		{
+			ncorners = i+1;
+			break;
+		}
+	}
+	
+	return ncorners;
+}
+
+/** 
+@par
+
+Inaccurate locomotion or dynamic obstacle avoidance can force the argent position significantly outside the 
+original corridor. Over time this can result in the formation of a non-optimal corridor. Non-optimal paths can 
+also form near the corners of tiles.
+
+This function uses an efficient local visibility search to try to optimize the corridor 
+between the current position and @p next.
+
+The corridor will change only if @p next is visible from the current position and moving directly toward the point 
+is better than following the existing path.
+
+The more inaccurate the agent movement, the more beneficial this function becomes. Simply adjust the frequency 
+of the call to match the needs to the agent.
+
+This function is not suitable for long distance searches.
+*/
+void dtPathCorridor::optimizePathVisibility(const float* next, const float pathOptimizationRange,
+										  dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	dtAssert(m_path);
+	
+	// Clamp the ray to max distance.
+	float goal[3];
+	dtVcopy(goal, next);
+	float dist = dtVdist2D(m_pos, goal);
+	
+	// If too close to the goal, do not try to optimize.
+	if (dist < 0.01f)
+		return;
+	
+	// Overshoot a little. This helps to optimize open fields in tiled meshes.
+	dist = dtMin(dist+0.01f, pathOptimizationRange);
+	
+	// Adjust ray length.
+	float delta[3];
+	dtVsub(delta, goal, m_pos);
+	dtVmad(goal, m_pos, delta, pathOptimizationRange/dist);
+	
+	static const int MAX_RES = 32;
+	dtPolyRef res[MAX_RES];
+	float t, norm[3];
+	int nres = 0;
+	navquery->raycast(m_path[0], m_pos, goal, filter, &t, norm, res, &nres, MAX_RES);
+	if (nres > 1 && t > 0.99f)
+	{
+		m_npath = dtMergeCorridorStartShortcut(m_path, m_npath, m_maxPath, res, nres);
+	}
+}
+
+/**
+@par
+
+Inaccurate locomotion or dynamic obstacle avoidance can force the agent position significantly outside the 
+original corridor. Over time this can result in the formation of a non-optimal corridor. This function will use a 
+local area path search to try to re-optimize the corridor.
+
+The more inaccurate the agent movement, the more beneficial this function becomes. Simply adjust the frequency of 
+the call to match the needs to the agent.
+*/
+bool dtPathCorridor::optimizePathTopology(dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	dtAssert(navquery);
+	dtAssert(filter);
+	dtAssert(m_path);
+	
+	if (m_npath < 3)
+		return false;
+	
+	static const int MAX_ITER = 32;
+	static const int MAX_RES = 32;
+	
+	dtPolyRef res[MAX_RES];
+	int nres = 0;
+	navquery->initSlicedFindPath(m_path[0], m_path[m_npath-1], m_pos, m_target, filter);
+	navquery->updateSlicedFindPath(MAX_ITER, 0);
+	dtStatus status = navquery->finalizeSlicedFindPathPartial(m_path, m_npath, res, &nres, MAX_RES);
+	
+	if (dtStatusSucceed(status) && nres > 0)
+	{
+		m_npath = dtMergeCorridorStartShortcut(m_path, m_npath, m_maxPath, res, nres);
+		return true;
+	}
+	
+	return false;
+}
+
+bool dtPathCorridor::moveOverOffmeshConnection(dtPolyRef offMeshConRef, dtPolyRef* refs,
+											   float* startPos, float* endPos,
+											   dtNavMeshQuery* navquery)
+{
+	dtAssert(navquery);
+	dtAssert(m_path);
+	dtAssert(m_npath);
+
+	// Advance the path up to and over the off-mesh connection.
+	dtPolyRef prevRef = 0, polyRef = m_path[0];
+	int npos = 0;
+	while (npos < m_npath && polyRef != offMeshConRef)
+	{
+		prevRef = polyRef;
+		polyRef = m_path[npos];
+		npos++;
+	}
+	if (npos == m_npath)
+	{
+		// Could not find offMeshConRef
+		return false;
+	}
+	
+	// Prune path
+	for (int i = npos; i < m_npath; ++i)
+		m_path[i-npos] = m_path[i];
+	m_npath -= npos;
+
+	refs[0] = prevRef;
+	refs[1] = polyRef;
+	
+	const dtNavMesh* nav = navquery->getAttachedNavMesh();
+	dtAssert(nav);
+
+	dtStatus status = nav->getOffMeshConnectionPolyEndPoints(refs[0], refs[1], startPos, endPos);
+	if (dtStatusSucceed(status))
+	{
+		dtVcopy(m_pos, endPos);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+@par
+
+Behavior:
+
+- The movement is constrained to the surface of the navigation mesh. 
+- The corridor is automatically adjusted (shorted or lengthened) in order to remain valid. 
+- The new position will be located in the adjusted corridor's first polygon.
+
+The expected use case is that the desired position will be 'near' the current corridor. What is considered 'near' 
+depends on local polygon density, query search extents, etc.
+
+The resulting position will differ from the desired position if the desired position is not on the navigation mesh, 
+or it can't be reached using a local search.
+*/
+void dtPathCorridor::movePosition(const float* npos, dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	dtAssert(m_path);
+	dtAssert(m_npath);
+	
+	// Move along navmesh and update new position.
+	float result[3];
+	static const int MAX_VISITED = 16;
+	dtPolyRef visited[MAX_VISITED];
+	int nvisited = 0;
+	navquery->moveAlongSurface(m_path[0], m_pos, npos, filter,
+							   result, visited, &nvisited, MAX_VISITED);
+	m_npath = dtMergeCorridorStartMoved(m_path, m_npath, m_maxPath, visited, nvisited);
+	
+	// Adjust the position to stay on top of the navmesh.
+	float h = m_pos[1];
+	navquery->getPolyHeight(m_path[0], result, &h);
+	result[1] = h;
+	dtVcopy(m_pos, result);
+}
+
+/**
+@par
+
+Behavior:
+
+- The movement is constrained to the surface of the navigation mesh. 
+- The corridor is automatically adjusted (shorted or lengthened) in order to remain valid. 
+- The new target will be located in the adjusted corridor's last polygon.
+
+The expected use case is that the desired target will be 'near' the current corridor. What is considered 'near' depends on local polygon density, query search extents, etc.
+
+The resulting target will differ from the desired target if the desired target is not on the navigation mesh, or it can't be reached using a local search.
+*/
+void dtPathCorridor::moveTargetPosition(const float* npos, dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	dtAssert(m_path);
+	dtAssert(m_npath);
+	
+	// Move along navmesh and update new position.
+	float result[3];
+	static const int MAX_VISITED = 16;
+	dtPolyRef visited[MAX_VISITED];
+	int nvisited = 0;
+	navquery->moveAlongSurface(m_path[m_npath-1], m_target, npos, filter,
+							   result, visited, &nvisited, MAX_VISITED);
+	m_npath = dtMergeCorridorEndMoved(m_path, m_npath, m_maxPath, visited, nvisited);
+	
+	// TODO: should we do that?
+	// Adjust the position to stay on top of the navmesh.
+	/*	float h = m_target[1];
+	 navquery->getPolyHeight(m_path[m_npath-1], result, &h);
+	 result[1] = h;*/
+	
+	dtVcopy(m_target, result);
+}
+
+/// @par
+///
+/// The current corridor position is expected to be within the first polygon in the path. The target 
+/// is expected to be in the last polygon. 
+/// 
+/// @warning The size of the path must not exceed the size of corridor's path buffer set during #init().
+void dtPathCorridor::setCorridor(const float* target, const dtPolyRef* path, const int npath)
+{
+	dtAssert(m_path);
+	dtAssert(npath > 0);
+	dtAssert(npath < m_maxPath);
+	
+	dtVcopy(m_target, target);
+	memcpy(m_path, path, sizeof(dtPolyRef)*npath);
+	m_npath = npath;
+}
+
+bool dtPathCorridor::fixPathStart(dtPolyRef safeRef, const float* safePos)
+{
+	dtAssert(m_path);
+
+	dtVcopy(m_pos, safePos);
+	if (m_npath < 3 && m_npath > 0)
+	{
+		m_path[2] = m_path[m_npath-1];
+		m_path[0] = safeRef;
+		m_path[1] = 0;
+		m_npath = 3;
+	}
+	else
+	{
+		m_path[0] = safeRef;
+		m_path[1] = 0;
+	}
+	
+	return true;
+}
+
+bool dtPathCorridor::trimInvalidPath(dtPolyRef safeRef, const float* safePos,
+									 dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	dtAssert(navquery);
+	dtAssert(filter);
+	dtAssert(m_path);
+	
+	// Keep valid path as far as possible.
+	int n = 0;
+	while (n < m_npath && navquery->isValidPolyRef(m_path[n], filter)) {
+		n++;
+	}
+	
+	if (n == m_npath)
+	{
+		// All valid, no need to fix.
+		return true;
+	}
+	else if (n == 0)
+	{
+		// The first polyref is bad, use current safe values.
+		dtVcopy(m_pos, safePos);
+		m_path[0] = safeRef;
+		m_npath = 1;
+	}
+	else
+	{
+		// The path is partially usable.
+		m_npath = n;
+	}
+	
+	// Clamp target pos to last poly
+	float tgt[3];
+	dtVcopy(tgt, m_target);
+	navquery->closestPointOnPolyBoundary(m_path[m_npath-1], tgt, m_target);
+	
+	return true;
+}
+
+/// @par
+///
+/// The path can be invalidated if there are structural changes to the underlying navigation mesh, or the state of 
+/// a polygon within the path changes resulting in it being filtered out. (E.g. An exclusion or inclusion flag changes.)
+bool dtPathCorridor::isValid(const int maxLookAhead, dtNavMeshQuery* navquery, const dtQueryFilter* filter)
+{
+	// Check that all polygons still pass query filter.
+	const int n = dtMin(m_npath, maxLookAhead);
+	for (int i = 0; i < n; ++i)
+	{
+		if (!navquery->isValidPolyRef(m_path[i], filter))
+			return false;
+	}
+
+	return true;
+}
diff --git a/Engine/lib/recast/DetourCrowd/Source/DetourPathQueue.cpp b/Engine/lib/recast/DetourCrowd/Source/DetourPathQueue.cpp
new file mode 100644
index 000000000..de1862ab3
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Source/DetourPathQueue.cpp
@@ -0,0 +1,199 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <string.h>
+#include "DetourPathQueue.h"
+#include "DetourNavMesh.h"
+#include "DetourNavMeshQuery.h"
+#include "DetourAlloc.h"
+#include "DetourCommon.h"
+
+
+dtPathQueue::dtPathQueue() :
+	m_nextHandle(1),
+	m_maxPathSize(0),
+	m_queueHead(0),
+	m_navquery(0)
+{
+	for (int i = 0; i < MAX_QUEUE; ++i)
+		m_queue[i].path = 0;
+}
+
+dtPathQueue::~dtPathQueue()
+{
+	purge();
+}
+
+void dtPathQueue::purge()
+{
+	dtFreeNavMeshQuery(m_navquery);
+	m_navquery = 0;
+	for (int i = 0; i < MAX_QUEUE; ++i)
+	{
+		dtFree(m_queue[i].path);
+		m_queue[i].path = 0;
+	}
+}
+
+bool dtPathQueue::init(const int maxPathSize, const int maxSearchNodeCount, dtNavMesh* nav)
+{
+	purge();
+
+	m_navquery = dtAllocNavMeshQuery();
+	if (!m_navquery)
+		return false;
+	if (dtStatusFailed(m_navquery->init(nav, maxSearchNodeCount)))
+		return false;
+	
+	m_maxPathSize = maxPathSize;
+	for (int i = 0; i < MAX_QUEUE; ++i)
+	{
+		m_queue[i].ref = DT_PATHQ_INVALID;
+		m_queue[i].path = (dtPolyRef*)dtAlloc(sizeof(dtPolyRef)*m_maxPathSize, DT_ALLOC_PERM);
+		if (!m_queue[i].path)
+			return false;
+	}
+	
+	m_queueHead = 0;
+	
+	return true;
+}
+
+void dtPathQueue::update(const int maxIters)
+{
+	static const int MAX_KEEP_ALIVE = 2; // in update ticks.
+
+	// Update path request until there is nothing to update
+	// or upto maxIters pathfinder iterations has been consumed.
+	int iterCount = maxIters;
+	
+	for (int i = 0; i < MAX_QUEUE; ++i)
+	{
+		PathQuery& q = m_queue[m_queueHead % MAX_QUEUE];
+		
+		// Skip inactive requests.
+		if (q.ref == DT_PATHQ_INVALID)
+		{
+			m_queueHead++;
+			continue;
+		}
+		
+		// Handle completed request.
+		if (dtStatusSucceed(q.status) || dtStatusFailed(q.status))
+		{
+			// If the path result has not been read in few frames, free the slot.
+			q.keepAlive++;
+			if (q.keepAlive > MAX_KEEP_ALIVE)
+			{
+				q.ref = DT_PATHQ_INVALID;
+				q.status = 0;
+			}
+			
+			m_queueHead++;
+			continue;
+		}
+		
+		// Handle query start.
+		if (q.status == 0)
+		{
+			q.status = m_navquery->initSlicedFindPath(q.startRef, q.endRef, q.startPos, q.endPos, q.filter);
+		}		
+		// Handle query in progress.
+		if (dtStatusInProgress(q.status))
+		{
+			int iters = 0;
+			q.status = m_navquery->updateSlicedFindPath(iterCount, &iters);
+			iterCount -= iters;
+		}
+		if (dtStatusSucceed(q.status))
+		{
+			q.status = m_navquery->finalizeSlicedFindPath(q.path, &q.npath, m_maxPathSize);
+		}
+
+		if (iterCount <= 0)
+			break;
+
+		m_queueHead++;
+	}
+}
+
+dtPathQueueRef dtPathQueue::request(dtPolyRef startRef, dtPolyRef endRef,
+									const float* startPos, const float* endPos,
+									const dtQueryFilter* filter)
+{
+	// Find empty slot
+	int slot = -1;
+	for (int i = 0; i < MAX_QUEUE; ++i)
+	{
+		if (m_queue[i].ref == DT_PATHQ_INVALID)
+		{
+			slot = i;
+			break;
+		}
+	}
+	// Could not find slot.
+	if (slot == -1)
+		return DT_PATHQ_INVALID;
+	
+	dtPathQueueRef ref = m_nextHandle++;
+	if (m_nextHandle == DT_PATHQ_INVALID) m_nextHandle++;
+	
+	PathQuery& q = m_queue[slot];
+	q.ref = ref;
+	dtVcopy(q.startPos, startPos);
+	q.startRef = startRef;
+	dtVcopy(q.endPos, endPos);
+	q.endRef = endRef;
+	
+	q.status = 0;
+	q.npath = 0;
+	q.filter = filter;
+	q.keepAlive = 0;
+	
+	return ref;
+}
+
+dtStatus dtPathQueue::getRequestStatus(dtPathQueueRef ref) const
+{
+	for (int i = 0; i < MAX_QUEUE; ++i)
+	{
+		if (m_queue[i].ref == ref)
+			return m_queue[i].status;
+	}
+	return DT_FAILURE;
+}
+
+dtStatus dtPathQueue::getPathResult(dtPathQueueRef ref, dtPolyRef* path, int* pathSize, const int maxPath)
+{
+	for (int i = 0; i < MAX_QUEUE; ++i)
+	{
+		if (m_queue[i].ref == ref)
+		{
+			PathQuery& q = m_queue[i];
+			// Free request for reuse.
+			q.ref = DT_PATHQ_INVALID;
+			q.status = 0;
+			// Copy path
+			int n = dtMin(q.npath, maxPath);
+			memcpy(path, q.path, sizeof(dtPolyRef)*n);
+			*pathSize = n;
+			return DT_SUCCESS;
+		}
+	}
+	return DT_FAILURE;
+}
diff --git a/Engine/lib/recast/DetourCrowd/Source/DetourProximityGrid.cpp b/Engine/lib/recast/DetourCrowd/Source/DetourProximityGrid.cpp
new file mode 100644
index 000000000..d8226a4e5
--- /dev/null
+++ b/Engine/lib/recast/DetourCrowd/Source/DetourProximityGrid.cpp
@@ -0,0 +1,194 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <math.h>
+#include <string.h>
+#include <new>
+#include "DetourProximityGrid.h"
+#include "DetourCommon.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+
+
+dtProximityGrid* dtAllocProximityGrid()
+{
+	void* mem = dtAlloc(sizeof(dtProximityGrid), DT_ALLOC_PERM);
+	if (!mem) return 0;
+	return new(mem) dtProximityGrid;
+}
+
+void dtFreeProximityGrid(dtProximityGrid* ptr)
+{
+	if (!ptr) return;
+	ptr->~dtProximityGrid();
+	dtFree(ptr);
+}
+
+
+inline int hashPos2(int x, int y, int n)
+{
+	return ((x*73856093) ^ (y*19349663)) & (n-1);
+}
+
+
+dtProximityGrid::dtProximityGrid() :
+	m_maxItems(0),
+	m_cellSize(0),
+	m_pool(0),
+	m_poolHead(0),
+	m_poolSize(0),
+	m_buckets(0),
+	m_bucketsSize(0)
+{
+}
+
+dtProximityGrid::~dtProximityGrid()
+{
+	dtFree(m_buckets);
+	dtFree(m_pool);
+}
+
+bool dtProximityGrid::init(const int poolSize, const float cellSize)
+{
+	dtAssert(poolSize > 0);
+	dtAssert(cellSize > 0.0f);
+	
+	m_cellSize = cellSize;
+	m_invCellSize = 1.0f / m_cellSize;
+	
+	// Allocate hashs buckets
+	m_bucketsSize = dtNextPow2(poolSize);
+	m_buckets = (unsigned short*)dtAlloc(sizeof(unsigned short)*m_bucketsSize, DT_ALLOC_PERM);
+	if (!m_buckets)
+		return false;
+	
+	// Allocate pool of items.
+	m_poolSize = poolSize;
+	m_poolHead = 0;
+	m_pool = (Item*)dtAlloc(sizeof(Item)*m_poolSize, DT_ALLOC_PERM);
+	if (!m_pool)
+		return false;
+	
+	clear();
+	
+	return true;
+}
+
+void dtProximityGrid::clear()
+{
+	memset(m_buckets, 0xff, sizeof(unsigned short)*m_bucketsSize);
+	m_poolHead = 0;
+	m_bounds[0] = 0xffff;
+	m_bounds[1] = 0xffff;
+	m_bounds[2] = -0xffff;
+	m_bounds[3] = -0xffff;
+}
+
+void dtProximityGrid::addItem(const unsigned short id,
+							  const float minx, const float miny,
+							  const float maxx, const float maxy)
+{
+	const int iminx = (int)floorf(minx * m_invCellSize);
+	const int iminy = (int)floorf(miny * m_invCellSize);
+	const int imaxx = (int)floorf(maxx * m_invCellSize);
+	const int imaxy = (int)floorf(maxy * m_invCellSize);
+	
+	m_bounds[0] = dtMin(m_bounds[0], iminx);
+	m_bounds[1] = dtMin(m_bounds[1], iminy);
+	m_bounds[2] = dtMax(m_bounds[2], imaxx);
+	m_bounds[3] = dtMax(m_bounds[3], imaxy);
+	
+	for (int y = iminy; y <= imaxy; ++y)
+	{
+		for (int x = iminx; x <= imaxx; ++x)
+		{
+			if (m_poolHead < m_poolSize)
+			{
+				const int h = hashPos2(x, y, m_bucketsSize);
+				const unsigned short idx = (unsigned short)m_poolHead;
+				m_poolHead++;
+				Item& item = m_pool[idx];
+				item.x = (short)x;
+				item.y = (short)y;
+				item.id = id;
+				item.next = m_buckets[h];
+				m_buckets[h] = idx;
+			}
+		}
+	}
+}
+
+int dtProximityGrid::queryItems(const float minx, const float miny,
+								const float maxx, const float maxy,
+								unsigned short* ids, const int maxIds) const
+{
+	const int iminx = (int)floorf(minx * m_invCellSize);
+	const int iminy = (int)floorf(miny * m_invCellSize);
+	const int imaxx = (int)floorf(maxx * m_invCellSize);
+	const int imaxy = (int)floorf(maxy * m_invCellSize);
+	
+	int n = 0;
+	
+	for (int y = iminy; y <= imaxy; ++y)
+	{
+		for (int x = iminx; x <= imaxx; ++x)
+		{
+			const int h = hashPos2(x, y, m_bucketsSize);
+			unsigned short idx = m_buckets[h];
+			while (idx != 0xffff)
+			{
+				Item& item = m_pool[idx];
+				if ((int)item.x == x && (int)item.y == y)
+				{
+					// Check if the id exists already.
+					const unsigned short* end = ids + n;
+					unsigned short* i = ids;
+					while (i != end && *i != item.id)
+						++i;
+					// Item not found, add it.
+					if (i == end)
+					{
+						if (n >= maxIds)
+							return n;
+						ids[n++] = item.id;
+					}
+				}
+				idx = item.next;
+			}
+		}
+	}
+	
+	return n;
+}
+
+int dtProximityGrid::getItemCountAt(const int x, const int y) const
+{
+	int n = 0;
+	
+	const int h = hashPos2(x, y, m_bucketsSize);
+	unsigned short idx = m_buckets[h];
+	while (idx != 0xffff)
+	{
+		Item& item = m_pool[idx];
+		if ((int)item.x == x && (int)item.y == y)
+			n++;
+		idx = item.next;
+	}
+	
+	return n;
+}
diff --git a/Engine/lib/recast/DetourTileCache/CMakeLists.txt b/Engine/lib/recast/DetourTileCache/CMakeLists.txt
new file mode 100644
index 000000000..dd481a436
--- /dev/null
+++ b/Engine/lib/recast/DetourTileCache/CMakeLists.txt
@@ -0,0 +1,18 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+SET(detourtilecache_SRCS 
+	Source/DetourTileCache.cpp
+	Source/DetourTileCacheBuilder.cpp
+)
+
+SET(detourtilecache_HDRS
+	Include/DetourTileCache.h
+	Include/DetourTileCacheBuilder.h
+)
+
+INCLUDE_DIRECTORIES(Include 
+	../Detour/Include
+	../Recast/Include
+)
+
+ADD_LIBRARY(DetourTileCache ${detourtilecache_SRCS} ${detourtilecache_HDRS})
diff --git a/Engine/lib/recast/DetourTileCache/Include/DetourTileCache.h b/Engine/lib/recast/DetourTileCache/Include/DetourTileCache.h
new file mode 100644
index 000000000..21ee25e74
--- /dev/null
+++ b/Engine/lib/recast/DetourTileCache/Include/DetourTileCache.h
@@ -0,0 +1,210 @@
+#ifndef DETOURTILECACHE_H
+#define DETOURTILECACHE_H
+
+#include "DetourStatus.h"
+
+
+
+typedef unsigned int dtObstacleRef;
+
+typedef unsigned int dtCompressedTileRef;
+
+/// Flags for addTile
+enum dtCompressedTileFlags
+{
+	DT_COMPRESSEDTILE_FREE_DATA = 0x01,					///< Navmesh owns the tile memory and should free it.
+};
+
+struct dtCompressedTile
+{
+	unsigned int salt;						///< Counter describing modifications to the tile.
+	struct dtTileCacheLayerHeader* header;
+	unsigned char* compressed;
+	int compressedSize;
+	unsigned char* data;
+	int dataSize;
+	unsigned int flags;
+	dtCompressedTile* next;
+};
+
+enum ObstacleState
+{
+	DT_OBSTACLE_EMPTY,
+	DT_OBSTACLE_PROCESSING,
+	DT_OBSTACLE_PROCESSED,
+	DT_OBSTACLE_REMOVING,
+};
+
+static const int DT_MAX_TOUCHED_TILES = 8;
+struct dtTileCacheObstacle
+{
+	float pos[3], radius, height;
+	dtCompressedTileRef touched[DT_MAX_TOUCHED_TILES];
+	dtCompressedTileRef pending[DT_MAX_TOUCHED_TILES];
+	unsigned short salt;
+	unsigned char state;
+	unsigned char ntouched;
+	unsigned char npending;
+	dtTileCacheObstacle* next;
+};
+
+struct dtTileCacheParams
+{
+	float orig[3];
+	float cs, ch;
+	int width, height;
+	float walkableHeight;
+	float walkableRadius;
+	float walkableClimb;
+	float maxSimplificationError;
+	int maxTiles;
+	int maxObstacles;
+};
+
+struct dtTileCacheMeshProcess
+{
+	virtual void process(struct dtNavMeshCreateParams* params,
+						 unsigned char* polyAreas, unsigned short* polyFlags) = 0;
+};
+
+
+class dtTileCache
+{
+public:
+	dtTileCache();
+	~dtTileCache();
+	
+	struct dtTileCacheAlloc* getAlloc() { return m_talloc; }
+	struct dtTileCacheCompressor* getCompressor() { return m_tcomp; }
+	const dtTileCacheParams* getParams() const { return &m_params; }
+	
+	inline int getTileCount() const { return m_params.maxTiles; }
+	inline const dtCompressedTile* getTile(const int i) const { return &m_tiles[i]; }
+	
+	inline int getObstacleCount() const { return m_params.maxObstacles; }
+	inline const dtTileCacheObstacle* getObstacle(const int i) const { return &m_obstacles[i]; }
+	
+	const dtTileCacheObstacle* getObstacleByRef(dtObstacleRef ref);
+	
+	dtObstacleRef getObstacleRef(const dtTileCacheObstacle* obmin) const;
+	
+	dtStatus init(const dtTileCacheParams* params,
+				  struct dtTileCacheAlloc* talloc,
+				  struct dtTileCacheCompressor* tcomp,
+				  struct dtTileCacheMeshProcess* tmproc);
+	
+	int getTilesAt(const int tx, const int ty, dtCompressedTileRef* tiles, const int maxTiles) const ;
+	
+	dtCompressedTile* getTileAt(const int tx, const int ty, const int tlayer);
+	dtCompressedTileRef getTileRef(const dtCompressedTile* tile) const;
+	const dtCompressedTile* getTileByRef(dtCompressedTileRef ref) const;
+	
+	dtStatus addTile(unsigned char* data, const int dataSize, unsigned char flags, dtCompressedTileRef* result);
+	
+	dtStatus removeTile(dtCompressedTileRef ref, unsigned char** data, int* dataSize);
+	
+	dtStatus addObstacle(const float* pos, const float radius, const float height, dtObstacleRef* result);
+	dtStatus removeObstacle(const dtObstacleRef ref);
+	
+	dtStatus queryTiles(const float* bmin, const float* bmax,
+						dtCompressedTileRef* results, int* resultCount, const int maxResults) const;
+	
+	dtStatus update(const float /*dt*/, class dtNavMesh* navmesh);
+	
+	dtStatus buildNavMeshTilesAt(const int tx, const int ty, class dtNavMesh* navmesh);
+	
+	dtStatus buildNavMeshTile(const dtCompressedTileRef ref, class dtNavMesh* navmesh);
+	
+	void calcTightTileBounds(const struct dtTileCacheLayerHeader* header, float* bmin, float* bmax) const;
+	
+	void getObstacleBounds(const struct dtTileCacheObstacle* ob, float* bmin, float* bmax) const;
+	
+
+	/// Encodes a tile id.
+	inline dtCompressedTileRef encodeTileId(unsigned int salt, unsigned int it) const
+	{
+		return ((dtCompressedTileRef)salt << m_tileBits) | (dtCompressedTileRef)it;
+	}
+	
+	/// Decodes a tile salt.
+	inline unsigned int decodeTileIdSalt(dtCompressedTileRef ref) const
+	{
+		const dtCompressedTileRef saltMask = ((dtCompressedTileRef)1<<m_saltBits)-1;
+		return (unsigned int)((ref >> m_tileBits) & saltMask);
+	}
+	
+	/// Decodes a tile id.
+	inline unsigned int decodeTileIdTile(dtCompressedTileRef ref) const
+	{
+		const dtCompressedTileRef tileMask = ((dtCompressedTileRef)1<<m_tileBits)-1;
+		return (unsigned int)(ref & tileMask);
+	}
+
+	/// Encodes an obstacle id.
+	inline dtObstacleRef encodeObstacleId(unsigned int salt, unsigned int it) const
+	{
+		return ((dtObstacleRef)salt << 16) | (dtObstacleRef)it;
+	}
+	
+	/// Decodes an obstacle salt.
+	inline unsigned int decodeObstacleIdSalt(dtObstacleRef ref) const
+	{
+		const dtObstacleRef saltMask = ((dtObstacleRef)1<<16)-1;
+		return (unsigned int)((ref >> 16) & saltMask);
+	}
+	
+	/// Decodes an obstacle id.
+	inline unsigned int decodeObstacleIdObstacle(dtObstacleRef ref) const
+	{
+		const dtObstacleRef tileMask = ((dtObstacleRef)1<<16)-1;
+		return (unsigned int)(ref & tileMask);
+	}
+	
+	
+private:
+	
+	enum ObstacleRequestAction
+	{
+		REQUEST_ADD,
+		REQUEST_REMOVE,
+	};
+	
+	struct ObstacleRequest
+	{
+		int action;
+		dtObstacleRef ref;
+	};
+	
+	int m_tileLutSize;						///< Tile hash lookup size (must be pot).
+	int m_tileLutMask;						///< Tile hash lookup mask.
+	
+	dtCompressedTile** m_posLookup;			///< Tile hash lookup.
+	dtCompressedTile* m_nextFreeTile;		///< Freelist of tiles.
+	dtCompressedTile* m_tiles;				///< List of tiles.
+	
+	unsigned int m_saltBits;				///< Number of salt bits in the tile ID.
+	unsigned int m_tileBits;				///< Number of tile bits in the tile ID.
+	
+	dtTileCacheParams m_params;
+	
+	dtTileCacheAlloc* m_talloc;
+	dtTileCacheCompressor* m_tcomp;
+	dtTileCacheMeshProcess* m_tmproc;
+	
+	dtTileCacheObstacle* m_obstacles;
+	dtTileCacheObstacle* m_nextFreeObstacle;
+	
+	static const int MAX_REQUESTS = 64;
+	ObstacleRequest m_reqs[MAX_REQUESTS];
+	int m_nreqs;
+	
+	static const int MAX_UPDATE = 64;
+	dtCompressedTileRef m_update[MAX_UPDATE];
+	int m_nupdate;
+	
+};
+
+dtTileCache* dtAllocTileCache();
+void dtFreeTileCache(dtTileCache* tc);
+
+#endif
diff --git a/Engine/lib/recast/DetourTileCache/Include/DetourTileCacheBuilder.h b/Engine/lib/recast/DetourTileCache/Include/DetourTileCacheBuilder.h
new file mode 100644
index 000000000..e2b798406
--- /dev/null
+++ b/Engine/lib/recast/DetourTileCache/Include/DetourTileCacheBuilder.h
@@ -0,0 +1,148 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef DETOURTILECACHEBUILDER_H
+#define DETOURTILECACHEBUILDER_H
+
+#include "DetourAlloc.h"
+#include "DetourStatus.h"
+
+static const int DT_TILECACHE_MAGIC = 'D'<<24 | 'T'<<16 | 'L'<<8 | 'R'; ///< 'DTLR';
+static const int DT_TILECACHE_VERSION = 1;
+
+static const unsigned char DT_TILECACHE_NULL_AREA = 0;
+static const unsigned char DT_TILECACHE_WALKABLE_AREA = 63;
+static const unsigned short DT_TILECACHE_NULL_IDX = 0xffff;
+
+struct dtTileCacheLayerHeader
+{
+	int magic;								///< Data magic
+	int version;							///< Data version
+	int tx,ty,tlayer;
+	float bmin[3], bmax[3];
+	unsigned short hmin, hmax;				///< Height min/max range
+	unsigned char width, height;			///< Dimension of the layer.
+	unsigned char minx, maxx, miny, maxy;	///< Usable sub-region.
+};
+
+struct dtTileCacheLayer
+{
+	dtTileCacheLayerHeader* header;
+	unsigned char regCount;					///< Region count.
+	unsigned char* heights;
+	unsigned char* areas;
+	unsigned char* cons;
+	unsigned char* regs;
+};
+
+struct dtTileCacheContour
+{
+	int nverts;
+	unsigned char* verts;
+	unsigned char reg;
+	unsigned char area;
+};
+
+struct dtTileCacheContourSet
+{
+	int nconts;
+	dtTileCacheContour* conts;
+};
+
+struct dtTileCachePolyMesh
+{
+	int nvp;
+	int nverts;				///< Number of vertices.
+	int npolys;				///< Number of polygons.
+	unsigned short* verts;	///< Vertices of the mesh, 3 elements per vertex.
+	unsigned short* polys;	///< Polygons of the mesh, nvp*2 elements per polygon.
+	unsigned short* flags;	///< Per polygon flags.
+	unsigned char* areas;	///< Area ID of polygons.
+};
+
+
+struct dtTileCacheAlloc
+{
+	virtual void reset()
+	{
+	}
+	
+	virtual void* alloc(const int size)
+	{
+		return dtAlloc(size, DT_ALLOC_TEMP);
+	}
+	
+	virtual void free(void* ptr)
+	{
+		dtFree(ptr);
+	}
+};
+
+struct dtTileCacheCompressor
+{
+	virtual int maxCompressedSize(const int bufferSize) = 0;
+	virtual dtStatus compress(const unsigned char* buffer, const int bufferSize,
+							  unsigned char* compressed, const int maxCompressedSize, int* compressedSize) = 0;
+	virtual dtStatus decompress(const unsigned char* compressed, const int compressedSize,
+								unsigned char* buffer, const int maxBufferSize, int* bufferSize) = 0;
+};
+
+
+dtStatus dtBuildTileCacheLayer(dtTileCacheCompressor* comp,
+							   dtTileCacheLayerHeader* header,
+							   const unsigned char* heights,
+							   const unsigned char* areas,
+							   const unsigned char* cons,
+							   unsigned char** outData, int* outDataSize);
+
+void dtFreeTileCacheLayer(dtTileCacheAlloc* alloc, dtTileCacheLayer* layer);
+
+dtStatus dtDecompressTileCacheLayer(dtTileCacheAlloc* alloc, dtTileCacheCompressor* comp,
+									unsigned char* compressed, const int compressedSize,
+									dtTileCacheLayer** layerOut);
+
+dtTileCacheContourSet* dtAllocTileCacheContourSet(dtTileCacheAlloc* alloc);
+void dtFreeTileCacheContourSet(dtTileCacheAlloc* alloc, dtTileCacheContourSet* cset);
+
+dtTileCachePolyMesh* dtAllocTileCachePolyMesh(dtTileCacheAlloc* alloc);
+void dtFreeTileCachePolyMesh(dtTileCacheAlloc* alloc, dtTileCachePolyMesh* lmesh);
+
+dtStatus dtMarkCylinderArea(dtTileCacheLayer& layer, const float* orig, const float cs, const float ch,
+							const float* pos, const float radius, const float height, const unsigned char areaId);
+
+dtStatus dtBuildTileCacheRegions(dtTileCacheAlloc* alloc,
+								 dtTileCacheLayer& layer,
+								 const int walkableClimb);
+
+dtStatus dtBuildTileCacheContours(dtTileCacheAlloc* alloc,
+								  dtTileCacheLayer& layer,
+								  const int walkableClimb, 	const float maxError,
+								  dtTileCacheContourSet& lcset);
+
+dtStatus dtBuildTileCachePolyMesh(dtTileCacheAlloc* alloc,
+								  dtTileCacheContourSet& lcset,
+								  dtTileCachePolyMesh& mesh);
+
+/// Swaps the endianess of the compressed tile data's header (#dtTileCacheLayerHeader).
+/// Tile layer data does not need endian swapping as it consits only of bytes.
+///  @param[in,out]	data		The tile data array.
+///  @param[in]		dataSize	The size of the data array.
+bool dtTileCacheHeaderSwapEndian(unsigned char* data, const int dataSize);
+
+
+#endif // DETOURTILECACHEBUILDER_H
diff --git a/Engine/lib/recast/DetourTileCache/Source/DetourTileCache.cpp b/Engine/lib/recast/DetourTileCache/Source/DetourTileCache.cpp
new file mode 100644
index 000000000..8933f6985
--- /dev/null
+++ b/Engine/lib/recast/DetourTileCache/Source/DetourTileCache.cpp
@@ -0,0 +1,704 @@
+#include "DetourTileCache.h"
+#include "DetourTileCacheBuilder.h"
+#include "DetourNavMeshBuilder.h"
+#include "DetourNavMesh.h"
+#include "DetourCommon.h"
+#include "DetourAlloc.h"
+#include "DetourAssert.h"
+#include <math.h>
+#include <string.h>
+#include <new>
+
+dtTileCache* dtAllocTileCache()
+{
+	void* mem = dtAlloc(sizeof(dtTileCache), DT_ALLOC_PERM);
+	if (!mem) return 0;
+	return new(mem) dtTileCache;
+}
+
+void dtFreeTileCache(dtTileCache* tc)
+{
+	if (!tc) return;
+	tc->~dtTileCache();
+	dtFree(tc);
+}
+
+static bool contains(const dtCompressedTileRef* a, const int n, const dtCompressedTileRef v)
+{
+	for (int i = 0; i < n; ++i)
+		if (a[i] == v)
+			return true;
+	return false;
+}
+
+inline int computeTileHash(int x, int y, const int mask)
+{
+	const unsigned int h1 = 0x8da6b343; // Large multiplicative constants;
+	const unsigned int h2 = 0xd8163841; // here arbitrarily chosen primes
+	unsigned int n = h1 * x + h2 * y;
+	return (int)(n & mask);
+}
+
+
+struct BuildContext
+{
+	inline BuildContext(struct dtTileCacheAlloc* a) : layer(0), lcset(0), lmesh(0), alloc(a) {}
+	inline ~BuildContext() { purge(); }
+	void purge()
+	{
+		dtFreeTileCacheLayer(alloc, layer);
+		layer = 0;
+		dtFreeTileCacheContourSet(alloc, lcset);
+		lcset = 0;
+		dtFreeTileCachePolyMesh(alloc, lmesh);
+		lmesh = 0;
+	}
+	struct dtTileCacheLayer* layer;
+	struct dtTileCacheContourSet* lcset;
+	struct dtTileCachePolyMesh* lmesh;
+	struct dtTileCacheAlloc* alloc;
+};
+
+
+dtTileCache::dtTileCache() :
+	m_tileLutSize(0),
+	m_tileLutMask(0),
+	m_posLookup(0),
+	m_nextFreeTile(0),	
+	m_tiles(0),	
+	m_saltBits(0),
+	m_tileBits(0),
+	m_talloc(0),
+	m_tcomp(0),
+	m_tmproc(0),
+	m_obstacles(0),
+	m_nextFreeObstacle(0),
+	m_nreqs(0),
+	m_nupdate(0)
+{
+	memset(&m_params, 0, sizeof(m_params));
+}
+	
+dtTileCache::~dtTileCache()
+{
+	for (int i = 0; i < m_params.maxTiles; ++i)
+	{
+		if (m_tiles[i].flags & DT_COMPRESSEDTILE_FREE_DATA)
+		{
+			dtFree(m_tiles[i].data);
+			m_tiles[i].data = 0;
+		}
+	}
+	dtFree(m_obstacles);
+	m_obstacles = 0;
+	dtFree(m_posLookup);
+	m_posLookup = 0;
+	dtFree(m_tiles);
+	m_tiles = 0;
+	m_nreqs = 0;
+	m_nupdate = 0;
+}
+
+const dtCompressedTile* dtTileCache::getTileByRef(dtCompressedTileRef ref) const
+{
+	if (!ref)
+		return 0;
+	unsigned int tileIndex = decodeTileIdTile(ref);
+	unsigned int tileSalt = decodeTileIdSalt(ref);
+	if ((int)tileIndex >= m_params.maxTiles)
+		return 0;
+	const dtCompressedTile* tile = &m_tiles[tileIndex];
+	if (tile->salt != tileSalt)
+		return 0;
+	return tile;
+}
+
+
+dtStatus dtTileCache::init(const dtTileCacheParams* params,
+						   dtTileCacheAlloc* talloc,
+						   dtTileCacheCompressor* tcomp,
+						   dtTileCacheMeshProcess* tmproc)
+{
+	m_talloc = talloc;
+	m_tcomp = tcomp;
+	m_tmproc = tmproc;
+	m_nreqs = 0;
+	memcpy(&m_params, params, sizeof(m_params));
+	
+	// Alloc space for obstacles.
+	m_obstacles = (dtTileCacheObstacle*)dtAlloc(sizeof(dtTileCacheObstacle)*m_params.maxObstacles, DT_ALLOC_PERM);
+	if (!m_obstacles)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(m_obstacles, 0, sizeof(dtTileCacheObstacle)*m_params.maxObstacles);
+	m_nextFreeObstacle = 0;
+	for (int i = m_params.maxObstacles-1; i >= 0; --i)
+	{
+		m_obstacles[i].salt = 1;
+		m_obstacles[i].next = m_nextFreeObstacle;
+		m_nextFreeObstacle = &m_obstacles[i];
+	}
+	
+	// Init tiles
+	m_tileLutSize = dtNextPow2(m_params.maxTiles/4);
+	if (!m_tileLutSize) m_tileLutSize = 1;
+	m_tileLutMask = m_tileLutSize-1;
+	
+	m_tiles = (dtCompressedTile*)dtAlloc(sizeof(dtCompressedTile)*m_params.maxTiles, DT_ALLOC_PERM);
+	if (!m_tiles)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	m_posLookup = (dtCompressedTile**)dtAlloc(sizeof(dtCompressedTile*)*m_tileLutSize, DT_ALLOC_PERM);
+	if (!m_posLookup)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(m_tiles, 0, sizeof(dtCompressedTile)*m_params.maxTiles);
+	memset(m_posLookup, 0, sizeof(dtCompressedTile*)*m_tileLutSize);
+	m_nextFreeTile = 0;
+	for (int i = m_params.maxTiles-1; i >= 0; --i)
+	{
+		m_tiles[i].salt = 1;
+		m_tiles[i].next = m_nextFreeTile;
+		m_nextFreeTile = &m_tiles[i];
+	}
+	
+	// Init ID generator values.
+	m_tileBits = dtIlog2(dtNextPow2((unsigned int)m_params.maxTiles));
+	// Only allow 31 salt bits, since the salt mask is calculated using 32bit uint and it will overflow.
+	m_saltBits = dtMin((unsigned int)31, 32 - m_tileBits);
+	if (m_saltBits < 10)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	return DT_SUCCESS;
+}
+
+int dtTileCache::getTilesAt(const int tx, const int ty, dtCompressedTileRef* tiles, const int maxTiles) const 
+{
+	int n = 0;
+	
+	// Find tile based on hash.
+	int h = computeTileHash(tx,ty,m_tileLutMask);
+	dtCompressedTile* tile = m_posLookup[h];
+	while (tile)
+	{
+		if (tile->header &&
+			tile->header->tx == tx &&
+			tile->header->ty == ty)
+		{
+			if (n < maxTiles)
+				tiles[n++] = getTileRef(tile);
+		}
+		tile = tile->next;
+	}
+	
+	return n;
+}
+
+dtCompressedTile* dtTileCache::getTileAt(const int tx, const int ty, const int tlayer)
+{
+	// Find tile based on hash.
+	int h = computeTileHash(tx,ty,m_tileLutMask);
+	dtCompressedTile* tile = m_posLookup[h];
+	while (tile)
+	{
+		if (tile->header &&
+			tile->header->tx == tx &&
+			tile->header->ty == ty &&
+			tile->header->tlayer == tlayer)
+		{
+			return tile;
+		}
+		tile = tile->next;
+	}
+	return 0;
+}
+
+dtCompressedTileRef dtTileCache::getTileRef(const dtCompressedTile* tile) const
+{
+	if (!tile) return 0;
+	const unsigned int it = tile - m_tiles;
+	return (dtCompressedTileRef)encodeTileId(tile->salt, it);
+}
+
+dtObstacleRef dtTileCache::getObstacleRef(const dtTileCacheObstacle* ob) const
+{
+	if (!ob) return 0;
+	const unsigned int idx = ob - m_obstacles;
+	return encodeObstacleId(ob->salt, idx);
+}
+
+const dtTileCacheObstacle* dtTileCache::getObstacleByRef(dtObstacleRef ref)
+{
+	if (!ref)
+		return 0;
+	unsigned int idx = decodeObstacleIdObstacle(ref);
+	if ((int)idx >= m_params.maxObstacles)
+		return 0;
+	const dtTileCacheObstacle* ob = &m_obstacles[idx];
+	unsigned int salt = decodeObstacleIdSalt(ref);
+	if (ob->salt != salt)
+		return 0;
+	return ob;
+}
+
+dtStatus dtTileCache::addTile(unsigned char* data, const int dataSize, unsigned char flags, dtCompressedTileRef* result)
+{
+	// Make sure the data is in right format.
+	dtTileCacheLayerHeader* header = (dtTileCacheLayerHeader*)data;
+	if (header->magic != DT_TILECACHE_MAGIC)
+		return DT_FAILURE | DT_WRONG_MAGIC;
+	if (header->version != DT_TILECACHE_VERSION)
+		return DT_FAILURE | DT_WRONG_VERSION;
+	
+	// Make sure the location is free.
+	if (getTileAt(header->tx, header->ty, header->tlayer))
+		return DT_FAILURE;
+	
+	// Allocate a tile.
+	dtCompressedTile* tile = 0;
+	if (m_nextFreeTile)
+	{
+		tile = m_nextFreeTile;
+		m_nextFreeTile = tile->next;
+		tile->next = 0;
+	}
+	
+	// Make sure we could allocate a tile.
+	if (!tile)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	
+	// Insert tile into the position lut.
+	int h = computeTileHash(header->tx, header->ty, m_tileLutMask);
+	tile->next = m_posLookup[h];
+	m_posLookup[h] = tile;
+	
+	// Init tile.
+	const int headerSize = dtAlign4(sizeof(dtTileCacheLayerHeader));
+	tile->header = (dtTileCacheLayerHeader*)data;
+	tile->data = data;
+	tile->dataSize = dataSize;
+	tile->compressed = tile->data + headerSize;
+	tile->compressedSize = tile->dataSize - headerSize;
+	tile->flags = flags;
+	
+	if (result)
+		*result = getTileRef(tile);
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtTileCache::removeTile(dtCompressedTileRef ref, unsigned char** data, int* dataSize)
+{
+	if (!ref)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	unsigned int tileIndex = decodeTileIdTile(ref);
+	unsigned int tileSalt = decodeTileIdSalt(ref);
+	if ((int)tileIndex >= m_params.maxTiles)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	dtCompressedTile* tile = &m_tiles[tileIndex];
+	if (tile->salt != tileSalt)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	// Remove tile from hash lookup.
+	const int h = computeTileHash(tile->header->tx,tile->header->ty,m_tileLutMask);
+	dtCompressedTile* prev = 0;
+	dtCompressedTile* cur = m_posLookup[h];
+	while (cur)
+	{
+		if (cur == tile)
+		{
+			if (prev)
+				prev->next = cur->next;
+			else
+				m_posLookup[h] = cur->next;
+			break;
+		}
+		prev = cur;
+		cur = cur->next;
+	}
+	
+	// Reset tile.
+	if (tile->flags & DT_COMPRESSEDTILE_FREE_DATA)
+	{
+		// Owns data
+		dtFree(tile->data);
+		tile->data = 0;
+		tile->dataSize = 0;
+		if (data) *data = 0;
+		if (dataSize) *dataSize = 0;
+	}
+	else
+	{
+		if (data) *data = tile->data;
+		if (dataSize) *dataSize = tile->dataSize;
+	}
+	
+	tile->header = 0;
+	tile->data = 0;
+	tile->dataSize = 0;
+	tile->compressed = 0;
+	tile->compressedSize = 0;
+	tile->flags = 0;
+	
+	// Update salt, salt should never be zero.
+	tile->salt = (tile->salt+1) & ((1<<m_saltBits)-1);
+	if (tile->salt == 0)
+		tile->salt++;
+	
+	// Add to free list.
+	tile->next = m_nextFreeTile;
+	m_nextFreeTile = tile;
+	
+	return DT_SUCCESS;
+}
+
+
+dtObstacleRef dtTileCache::addObstacle(const float* pos, const float radius, const float height, dtObstacleRef* result)
+{
+	if (m_nreqs >= MAX_REQUESTS)
+		return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+	
+	dtTileCacheObstacle* ob = 0;
+	if (m_nextFreeObstacle)
+	{
+		ob = m_nextFreeObstacle;
+		m_nextFreeObstacle = ob->next;
+		ob->next = 0;
+	}
+	if (!ob)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	
+	unsigned short salt = ob->salt;
+	memset(ob, 0, sizeof(dtTileCacheObstacle));
+	ob->salt = salt;
+	ob->state = DT_OBSTACLE_PROCESSING;
+	dtVcopy(ob->pos, pos);
+	ob->radius = radius;
+	ob->height = height;
+	
+	ObstacleRequest* req = &m_reqs[m_nreqs++];
+	memset(req, 0, sizeof(ObstacleRequest));
+	req->action = REQUEST_ADD;
+	req->ref = getObstacleRef(ob);
+	
+	if (result)
+		*result = req->ref;
+	
+	return DT_SUCCESS;
+}
+
+dtObstacleRef dtTileCache::removeObstacle(const dtObstacleRef ref)
+{
+	if (!ref)
+		return DT_SUCCESS;
+	if (m_nreqs >= MAX_REQUESTS)
+		return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+	
+	ObstacleRequest* req = &m_reqs[m_nreqs++];
+	memset(req, 0, sizeof(ObstacleRequest));
+	req->action = REQUEST_REMOVE;
+	req->ref = ref;
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtTileCache::queryTiles(const float* bmin, const float* bmax,
+								 dtCompressedTileRef* results, int* resultCount, const int maxResults) const 
+{
+	const int MAX_TILES = 32;
+	dtCompressedTileRef tiles[MAX_TILES];
+	
+	int n = 0;
+	
+	const float tw = m_params.width * m_params.cs;
+	const float th = m_params.height * m_params.cs;
+	const int tx0 = (int)floorf((bmin[0]-m_params.orig[0]) / tw);
+	const int tx1 = (int)floorf((bmax[0]-m_params.orig[0]) / tw);
+	const int ty0 = (int)floorf((bmin[2]-m_params.orig[2]) / th);
+	const int ty1 = (int)floorf((bmax[2]-m_params.orig[2]) / th);
+	
+	for (int ty = ty0; ty <= ty1; ++ty)
+	{
+		for (int tx = tx0; tx <= tx1; ++tx)
+		{
+			const int ntiles = getTilesAt(tx,ty,tiles,MAX_TILES);
+			
+			for (int i = 0; i < ntiles; ++i)
+			{
+				const dtCompressedTile* tile = &m_tiles[decodeTileIdTile(tiles[i])];
+				float tbmin[3], tbmax[3];
+				calcTightTileBounds(tile->header, tbmin, tbmax);
+				
+				if (dtOverlapBounds(bmin,bmax, tbmin,tbmax))
+				{
+					if (n < maxResults)
+						results[n++] = tiles[i];
+				}
+			}
+		}
+	}
+	
+	*resultCount = n;
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtTileCache::update(const float /*dt*/, dtNavMesh* navmesh)
+{
+	if (m_nupdate == 0)
+	{
+		// Process requests.
+		for (int i = 0; i < m_nreqs; ++i)
+		{
+			ObstacleRequest* req = &m_reqs[i];
+			
+			unsigned int idx = decodeObstacleIdObstacle(req->ref);
+			if ((int)idx >= m_params.maxObstacles)
+				continue;
+			dtTileCacheObstacle* ob = &m_obstacles[idx];
+			unsigned int salt = decodeObstacleIdSalt(req->ref);
+			if (ob->salt != salt)
+				continue;
+			
+			if (req->action == REQUEST_ADD)
+			{
+				// Find touched tiles.
+				float bmin[3], bmax[3];
+				getObstacleBounds(ob, bmin, bmax);
+
+				int ntouched = 0;
+				queryTiles(bmin, bmax, ob->touched, &ntouched, DT_MAX_TOUCHED_TILES);
+				ob->ntouched = (unsigned char)ntouched;
+				// Add tiles to update list.
+				ob->npending = 0;
+				for (int j = 0; j < ob->ntouched; ++j)
+				{
+					if (m_nupdate < MAX_UPDATE)
+					{
+						if (!contains(m_update, m_nupdate, ob->touched[j]))
+							m_update[m_nupdate++] = ob->touched[j];
+						ob->pending[ob->npending++] = ob->touched[j];
+					}
+				}
+			}
+			else if (req->action == REQUEST_REMOVE)
+			{
+				// Prepare to remove obstacle.
+				ob->state = DT_OBSTACLE_REMOVING;
+				// Add tiles to update list.
+				ob->npending = 0;
+				for (int j = 0; j < ob->ntouched; ++j)
+				{
+					if (m_nupdate < MAX_UPDATE)
+					{
+						if (!contains(m_update, m_nupdate, ob->touched[j]))
+							m_update[m_nupdate++] = ob->touched[j];
+						ob->pending[ob->npending++] = ob->touched[j];
+					}
+				}
+			}
+		}
+		
+		m_nreqs = 0;
+	}
+	
+	// Process updates
+	if (m_nupdate)
+	{
+		// Build mesh
+		const dtCompressedTileRef ref = m_update[0];
+		dtStatus status = buildNavMeshTile(ref, navmesh);
+		m_nupdate--;
+		if (m_nupdate > 0)
+			memmove(m_update, m_update+1, m_nupdate*sizeof(dtCompressedTileRef));
+
+		// Update obstacle states.
+		for (int i = 0; i < m_params.maxObstacles; ++i)
+		{
+			dtTileCacheObstacle* ob = &m_obstacles[i];
+			if (ob->state == DT_OBSTACLE_PROCESSING || ob->state == DT_OBSTACLE_REMOVING)
+			{
+				// Remove handled tile from pending list.
+				for (int j = 0; j < (int)ob->npending; j++)
+				{
+					if (ob->pending[j] == ref)
+					{
+						ob->pending[j] = ob->pending[(int)ob->npending-1];
+						ob->npending--;
+						break;
+					}
+				}
+				
+				// If all pending tiles processed, change state.
+				if (ob->npending == 0)
+				{
+					if (ob->state == DT_OBSTACLE_PROCESSING)
+					{
+						ob->state = DT_OBSTACLE_PROCESSED;
+					}
+					else if (ob->state == DT_OBSTACLE_REMOVING)
+					{
+						ob->state = DT_OBSTACLE_EMPTY;
+						// Update salt, salt should never be zero.
+						ob->salt = (ob->salt+1) & ((1<<16)-1);
+						if (ob->salt == 0)
+							ob->salt++;
+						// Return obstacle to free list.
+						ob->next = m_nextFreeObstacle;
+						m_nextFreeObstacle = ob;
+					}
+				}
+			}
+		}
+			
+		if (dtStatusFailed(status))
+			return status;
+	}
+	
+	return DT_SUCCESS;
+}
+
+
+dtStatus dtTileCache::buildNavMeshTilesAt(const int tx, const int ty, dtNavMesh* navmesh)
+{
+	const int MAX_TILES = 32;
+	dtCompressedTileRef tiles[MAX_TILES];
+	const int ntiles = getTilesAt(tx,ty,tiles,MAX_TILES);
+	
+	for (int i = 0; i < ntiles; ++i)
+	{
+		dtStatus status = buildNavMeshTile(tiles[i], navmesh);
+		if (dtStatusFailed(status))
+			return status;
+	}
+	
+	return DT_SUCCESS;
+}
+
+dtStatus dtTileCache::buildNavMeshTile(const dtCompressedTileRef ref, dtNavMesh* navmesh)
+{	
+	dtAssert(m_talloc);
+	dtAssert(m_tcomp);
+	
+	unsigned int idx = decodeTileIdTile(ref);
+	if (idx > (unsigned int)m_params.maxTiles)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	const dtCompressedTile* tile = &m_tiles[idx];
+	unsigned int salt = decodeTileIdSalt(ref);
+	if (tile->salt != salt)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	
+	m_talloc->reset();
+	
+	BuildContext bc(m_talloc);
+	const int walkableClimbVx = (int)(m_params.walkableClimb / m_params.ch);
+	dtStatus status;
+	
+	// Decompress tile layer data. 
+	status = dtDecompressTileCacheLayer(m_talloc, m_tcomp, tile->data, tile->dataSize, &bc.layer);
+	if (dtStatusFailed(status))
+		return status;
+	
+	// Rasterize obstacles.
+	for (int i = 0; i < m_params.maxObstacles; ++i)
+	{
+		const dtTileCacheObstacle* ob = &m_obstacles[i];
+		if (ob->state == DT_OBSTACLE_EMPTY || ob->state == DT_OBSTACLE_REMOVING)
+			continue;
+		if (contains(ob->touched, ob->ntouched, ref))
+		{
+			dtMarkCylinderArea(*bc.layer, tile->header->bmin, m_params.cs, m_params.ch,
+							   ob->pos, ob->radius, ob->height, 0);
+		}
+	}
+	
+	// Build navmesh
+	status = dtBuildTileCacheRegions(m_talloc, *bc.layer, walkableClimbVx);
+	if (dtStatusFailed(status))
+		return status;
+	
+	bc.lcset = dtAllocTileCacheContourSet(m_talloc);
+	if (!bc.lcset)
+		return status;
+	status = dtBuildTileCacheContours(m_talloc, *bc.layer, walkableClimbVx,
+									  m_params.maxSimplificationError, *bc.lcset);
+	if (dtStatusFailed(status))
+		return status;
+	
+	bc.lmesh = dtAllocTileCachePolyMesh(m_talloc);
+	if (!bc.lmesh)
+		return status;
+	status = dtBuildTileCachePolyMesh(m_talloc, *bc.lcset, *bc.lmesh);
+	if (dtStatusFailed(status))
+		return status;
+	
+	// Early out if the mesh tile is empty.
+	if (!bc.lmesh->npolys)
+		return DT_SUCCESS;
+	
+	dtNavMeshCreateParams params;
+	memset(&params, 0, sizeof(params));
+	params.verts = bc.lmesh->verts;
+	params.vertCount = bc.lmesh->nverts;
+	params.polys = bc.lmesh->polys;
+	params.polyAreas = bc.lmesh->areas;
+	params.polyFlags = bc.lmesh->flags;
+	params.polyCount = bc.lmesh->npolys;
+	params.nvp = DT_VERTS_PER_POLYGON;
+	params.walkableHeight = m_params.walkableHeight;
+	params.walkableRadius = m_params.walkableRadius;
+	params.walkableClimb = m_params.walkableClimb;
+	params.tileX = tile->header->tx;
+	params.tileY = tile->header->ty;
+	params.tileLayer = tile->header->tlayer;
+	params.cs = m_params.cs;
+	params.ch = m_params.ch;
+	params.buildBvTree = false;
+	dtVcopy(params.bmin, tile->header->bmin);
+	dtVcopy(params.bmax, tile->header->bmax);
+	
+	if (m_tmproc)
+	{
+		m_tmproc->process(&params, bc.lmesh->areas, bc.lmesh->flags);
+	}
+	
+	unsigned char* navData = 0;
+	int navDataSize = 0;
+	if (!dtCreateNavMeshData(&params, &navData, &navDataSize))
+		return DT_FAILURE;
+
+	// Remove existing tile.
+	navmesh->removeTile(navmesh->getTileRefAt(tile->header->tx,tile->header->ty,tile->header->tlayer),0,0);
+
+	// Add new tile, or leave the location empty.
+	if (navData)
+	{
+		// Let the navmesh own the data.
+		status = navmesh->addTile(navData,navDataSize,DT_TILE_FREE_DATA,0,0);
+		if (dtStatusFailed(status))
+		{
+			dtFree(navData);
+			return status;
+		}
+	}
+	
+	return DT_SUCCESS;
+}
+
+void dtTileCache::calcTightTileBounds(const dtTileCacheLayerHeader* header, float* bmin, float* bmax) const
+{
+	const float cs = m_params.cs;
+	bmin[0] = header->bmin[0] + header->minx*cs;
+	bmin[1] = header->bmin[1];
+	bmin[2] = header->bmin[2] + header->miny*cs;
+	bmax[0] = header->bmin[0] + (header->maxx+1)*cs;
+	bmax[1] = header->bmax[1];
+	bmax[2] = header->bmin[2] + (header->maxy+1)*cs;
+}
+
+void dtTileCache::getObstacleBounds(const struct dtTileCacheObstacle* ob, float* bmin, float* bmax) const
+{
+	bmin[0] = ob->pos[0] - ob->radius;
+	bmin[1] = ob->pos[1];
+	bmin[2] = ob->pos[2] - ob->radius;
+	bmax[0] = ob->pos[0] + ob->radius;
+	bmax[1] = ob->pos[1] + ob->height;
+	bmax[2] = ob->pos[2] + ob->radius;	
+}
diff --git a/Engine/lib/recast/DetourTileCache/Source/DetourTileCacheBuilder.cpp b/Engine/lib/recast/DetourTileCache/Source/DetourTileCacheBuilder.cpp
new file mode 100644
index 000000000..ca336a0e8
--- /dev/null
+++ b/Engine/lib/recast/DetourTileCache/Source/DetourTileCacheBuilder.cpp
@@ -0,0 +1,2150 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include "DetourCommon.h"
+#include "DetourStatus.h"
+#include "DetourAssert.h"
+#include "DetourTileCacheBuilder.h"
+#include <string.h>
+#include <math.h>
+
+
+template<class T> class dtFixedArray
+{
+	dtTileCacheAlloc* m_alloc;
+	T* m_ptr;
+	const int m_size;
+	inline T* operator=(T* p);
+	inline void operator=(dtFixedArray<T>& p);
+	inline dtFixedArray();
+public:
+	inline dtFixedArray(dtTileCacheAlloc* a, const int s) : m_alloc(a), m_ptr((T*)a->alloc(sizeof(T)*s)), m_size(s) {}
+	inline ~dtFixedArray() { if (m_alloc) m_alloc->free(m_ptr); }
+	inline operator T*() { return m_ptr; }
+	inline int size() const { return m_size; }
+};
+
+inline int getDirOffsetX(int dir)
+{
+	const int offset[4] = { -1, 0, 1, 0, };
+	return offset[dir&0x03];
+}
+
+inline int getDirOffsetY(int dir)
+{
+	const int offset[4] = { 0, 1, 0, -1 };
+	return offset[dir&0x03];
+}
+
+static const int MAX_VERTS_PER_POLY = 6;	// TODO: use the DT_VERTS_PER_POLYGON
+static const int MAX_REM_EDGES = 48;		// TODO: make this an expression.
+
+
+
+dtTileCacheContourSet* dtAllocTileCacheContourSet(dtTileCacheAlloc* alloc)
+{
+	dtAssert(alloc);
+
+	dtTileCacheContourSet* cset = (dtTileCacheContourSet*)alloc->alloc(sizeof(dtTileCacheContourSet));
+	memset(cset, 0, sizeof(dtTileCacheContourSet));
+	return cset;
+}
+
+void dtFreeTileCacheContourSet(dtTileCacheAlloc* alloc, dtTileCacheContourSet* cset)
+{
+	dtAssert(alloc);
+
+	if (!cset) return;
+	for (int i = 0; i < cset->nconts; ++i)
+		alloc->free(cset->conts[i].verts);
+	alloc->free(cset->conts);
+	alloc->free(cset);
+}
+
+dtTileCachePolyMesh* dtAllocTileCachePolyMesh(dtTileCacheAlloc* alloc)
+{
+	dtAssert(alloc);
+
+	dtTileCachePolyMesh* lmesh = (dtTileCachePolyMesh*)alloc->alloc(sizeof(dtTileCachePolyMesh));
+	memset(lmesh, 0, sizeof(dtTileCachePolyMesh));
+	return lmesh;
+}
+
+void dtFreeTileCachePolyMesh(dtTileCacheAlloc* alloc, dtTileCachePolyMesh* lmesh)
+{
+	dtAssert(alloc);
+	
+	if (!lmesh) return;
+	alloc->free(lmesh->verts);
+	alloc->free(lmesh->polys);
+	alloc->free(lmesh->flags);
+	alloc->free(lmesh->areas);
+	alloc->free(lmesh);
+}
+
+
+
+struct dtLayerSweepSpan
+{
+	unsigned short ns;	// number samples
+	unsigned char id;	// region id
+	unsigned char nei;	// neighbour id
+};
+
+static const int DT_LAYER_MAX_NEIS = 16;
+
+struct dtLayerMonotoneRegion
+{
+	int area;
+	unsigned char neis[DT_LAYER_MAX_NEIS];
+	unsigned char nneis;
+	unsigned char regId;
+	unsigned char areaId;
+};
+
+struct dtTempContour
+{
+	inline dtTempContour(unsigned char* vbuf, const int nvbuf,
+						 unsigned short* pbuf, const int npbuf) :
+		verts(vbuf), nverts(0), cverts(nvbuf),
+		poly(pbuf), npoly(0), cpoly(npbuf) 
+	{
+	}
+	unsigned char* verts;
+	int nverts;
+	int cverts;
+	unsigned short* poly;
+	int npoly;
+	int cpoly;
+};
+
+
+
+
+inline bool overlapRangeExl(const unsigned short amin, const unsigned short amax,
+							const unsigned short bmin, const unsigned short bmax)
+{
+	return (amin >= bmax || amax <= bmin) ? false : true;
+}
+
+static void addUniqueLast(unsigned char* a, unsigned char& an, unsigned char v)
+{
+	const int n = (int)an;
+	if (n > 0 && a[n-1] == v) return;
+	a[an] = v;
+	an++;
+}
+
+inline bool isConnected(const dtTileCacheLayer& layer,
+						const int ia, const int ib, const int walkableClimb)
+{
+	if (layer.areas[ia] != layer.areas[ib]) return false;
+	if (dtAbs((int)layer.heights[ia] - (int)layer.heights[ib]) > walkableClimb) return false;
+	return true;
+}
+
+static bool canMerge(unsigned char oldRegId, unsigned char newRegId, const dtLayerMonotoneRegion* regs, const int nregs)
+{
+	int count = 0;
+	for (int i = 0; i < nregs; ++i)
+	{
+		const dtLayerMonotoneRegion& reg = regs[i];
+		if (reg.regId != oldRegId) continue;
+		const int nnei = (int)reg.nneis;
+		for (int j = 0; j < nnei; ++j)
+		{
+			if (regs[reg.neis[j]].regId == newRegId)
+				count++;
+		}
+	}
+	return count == 1;
+}
+
+
+dtStatus dtBuildTileCacheRegions(dtTileCacheAlloc* alloc,
+								 dtTileCacheLayer& layer,
+								 const int walkableClimb)
+{
+	dtAssert(alloc);
+	
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	
+	memset(layer.regs,0xff,sizeof(unsigned char)*w*h);
+	
+	const int nsweeps = w;
+	dtFixedArray<dtLayerSweepSpan> sweeps(alloc, nsweeps);
+	if (!sweeps)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(sweeps,0,sizeof(dtLayerSweepSpan)*nsweeps);
+	
+	// Partition walkable area into monotone regions.
+	unsigned char prevCount[256];
+	unsigned char regId = 0;
+	
+	for (int y = 0; y < h; ++y)
+	{
+		if (regId > 0)
+			memset(prevCount,0,sizeof(unsigned char)*regId);
+		unsigned char sweepId = 0;
+		
+		for (int x = 0; x < w; ++x)
+		{
+			const int idx = x + y*w;
+			if (layer.areas[idx] == DT_TILECACHE_NULL_AREA) continue;
+			
+			unsigned char sid = 0xff;
+			
+			// -x
+			const int xidx = (x-1)+y*w;
+			if (x > 0 && isConnected(layer, idx, xidx, walkableClimb))
+			{
+				if (layer.regs[xidx] != 0xff)
+					sid = layer.regs[xidx];
+			}
+			
+			if (sid == 0xff)
+			{
+				sid = sweepId++;
+				sweeps[sid].nei = 0xff;
+				sweeps[sid].ns = 0;
+			}
+			
+			// -y
+			const int yidx = x+(y-1)*w;
+			if (y > 0 && isConnected(layer, idx, yidx, walkableClimb))
+			{
+				const unsigned char nr = layer.regs[yidx];
+				if (nr != 0xff)
+				{
+					// Set neighbour when first valid neighbour is encoutered.
+					if (sweeps[sid].ns == 0)
+						sweeps[sid].nei = nr;
+					
+					if (sweeps[sid].nei == nr)
+					{
+						// Update existing neighbour
+						sweeps[sid].ns++;
+						prevCount[nr]++;
+					}
+					else
+					{
+						// This is hit if there is nore than one neighbour.
+						// Invalidate the neighbour.
+						sweeps[sid].nei = 0xff;
+					}
+				}
+			}
+			
+			layer.regs[idx] = sid;
+		}
+		
+		// Create unique ID.
+		for (int i = 0; i < sweepId; ++i)
+		{
+			// If the neighbour is set and there is only one continuous connection to it,
+			// the sweep will be merged with the previous one, else new region is created.
+			if (sweeps[i].nei != 0xff && (unsigned short)prevCount[sweeps[i].nei] == sweeps[i].ns)
+			{
+				sweeps[i].id = sweeps[i].nei;
+			}
+			else
+			{
+				if (regId == 255)
+				{
+					// Region ID's overflow.
+					return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+				}
+				sweeps[i].id = regId++;
+			}
+		}
+		
+		// Remap local sweep ids to region ids.
+		for (int x = 0; x < w; ++x)
+		{
+			const int idx = x+y*w;
+			if (layer.regs[idx] != 0xff)
+				layer.regs[idx] = sweeps[layer.regs[idx]].id;
+		}
+	}
+	
+	// Allocate and init layer regions.
+	const int nregs = (int)regId;
+	dtFixedArray<dtLayerMonotoneRegion> regs(alloc, nregs);
+	if (!regs)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+
+	memset(regs, 0, sizeof(dtLayerMonotoneRegion)*nregs);
+	for (int i = 0; i < nregs; ++i)
+		regs[i].regId = 0xff;
+	
+	// Find region neighbours.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const int idx = x+y*w;
+			const unsigned char ri = layer.regs[idx];
+			if (ri == 0xff)
+				continue;
+			
+			// Update area.
+			regs[ri].area++;
+			regs[ri].areaId = layer.areas[idx];
+			
+			// Update neighbours
+			const int ymi = x+(y-1)*w;
+			if (y > 0 && isConnected(layer, idx, ymi, walkableClimb))
+			{
+				const unsigned char rai = layer.regs[ymi];
+				if (rai != 0xff && rai != ri)
+				{
+					addUniqueLast(regs[ri].neis, regs[ri].nneis, rai);
+					addUniqueLast(regs[rai].neis, regs[rai].nneis, ri);
+				}
+			}
+		}
+	}
+	
+	for (int i = 0; i < nregs; ++i)
+		regs[i].regId = (unsigned char)i;
+	
+	for (int i = 0; i < nregs; ++i)
+	{
+		dtLayerMonotoneRegion& reg = regs[i];
+		
+		int merge = -1;
+		int mergea = 0;
+		for (int j = 0; j < (int)reg.nneis; ++j)
+		{
+			const unsigned char nei = reg.neis[j];
+			dtLayerMonotoneRegion& regn = regs[nei];
+			if (reg.regId == regn.regId)
+				continue;
+			if (reg.areaId != regn.areaId)
+				continue;
+			if (regn.area > mergea)
+			{
+				if (canMerge(reg.regId, regn.regId, regs, nregs))
+				{
+					mergea = regn.area;
+					merge = (int)nei;
+				}
+			}
+		}
+		if (merge != -1)
+		{
+			const unsigned char oldId = reg.regId;
+			const unsigned char newId = regs[merge].regId;
+			for (int j = 0; j < nregs; ++j)
+				if (regs[j].regId == oldId)
+					regs[j].regId = newId;
+		}
+	}
+	
+	// Compact ids.
+	unsigned char remap[256];
+	memset(remap, 0, 256);
+	// Find number of unique regions.
+	regId = 0;
+	for (int i = 0; i < nregs; ++i)
+		remap[regs[i].regId] = 1;
+	for (int i = 0; i < 256; ++i)
+		if (remap[i])
+			remap[i] = regId++;
+	// Remap ids.
+	for (int i = 0; i < nregs; ++i)
+		regs[i].regId = remap[regs[i].regId];
+	
+	layer.regCount = regId;
+	
+	for (int i = 0; i < w*h; ++i)
+	{
+		if (layer.regs[i] != 0xff)
+			layer.regs[i] = regs[layer.regs[i]].regId;
+	}
+	
+	return DT_SUCCESS;
+}
+
+
+
+static bool appendVertex(dtTempContour& cont, const int x, const int y, const int z, const int r)
+{
+	// Try to merge with existing segments.
+	if (cont.nverts > 1)
+	{
+		unsigned char* pa = &cont.verts[(cont.nverts-2)*4];
+		unsigned char* pb = &cont.verts[(cont.nverts-1)*4];
+		if ((int)pb[3] == r)
+		{
+			if (pa[0] == pb[0] && (int)pb[0] == x)
+			{
+				// The verts are aligned aling x-axis, update z.
+				pb[1] = (unsigned char)y;
+				pb[2] = (unsigned char)z;
+				return true;
+			}
+			else if (pa[2] == pb[2] && (int)pb[2] == z)
+			{
+				// The verts are aligned aling z-axis, update x.
+				pb[0] = (unsigned char)x;
+				pb[1] = (unsigned char)y;
+				return true;
+			}
+		}
+	}
+	
+	// Add new point.
+	if (cont.nverts+1 > cont.cverts)
+		return false;
+	
+	unsigned char* v = &cont.verts[cont.nverts*4];
+	v[0] = (unsigned char)x;
+	v[1] = (unsigned char)y;
+	v[2] = (unsigned char)z;
+	v[3] = (unsigned char)r;
+	cont.nverts++;
+	
+	return true;
+}
+
+
+static unsigned char getNeighbourReg(dtTileCacheLayer& layer,
+									 const int ax, const int ay, const int dir)
+{
+	const int w = (int)layer.header->width;
+	const int ia = ax + ay*w;
+	
+	const unsigned char con = layer.cons[ia] & 0xf;
+	const unsigned char portal = layer.cons[ia] >> 4;
+	const unsigned char mask = (unsigned char)(1<<dir);
+	
+	if ((con & mask) == 0)
+	{
+		// No connection, return portal or hard edge.
+		if (portal & mask)
+			return 0xf8 + (unsigned char)dir;
+		return 0xff;
+	}
+	
+	const int bx = ax + getDirOffsetX(dir);
+	const int by = ay + getDirOffsetY(dir);
+	const int ib = bx + by*w;
+	
+	return layer.regs[ib];
+}
+
+static bool walkContour(dtTileCacheLayer& layer, int x, int y, dtTempContour& cont)
+{
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	
+	cont.nverts = 0;
+	
+	int startX = x;
+	int startY = y;
+	int startDir = -1;
+	
+	for (int i = 0; i < 4; ++i)
+	{
+		const int dir = (i+3)&3;
+		unsigned char rn = getNeighbourReg(layer, x, y, dir);
+		if (rn != layer.regs[x+y*w])
+		{
+			startDir = dir;
+			break;
+		}
+	}
+	if (startDir == -1)
+		return true;
+	
+	int dir = startDir;
+	const int maxIter = w*h;
+	
+	int iter = 0;
+	while (iter < maxIter)
+	{
+		unsigned char rn = getNeighbourReg(layer, x, y, dir);
+		
+		int nx = x;
+		int ny = y;
+		int ndir = dir;
+		
+		if (rn != layer.regs[x+y*w])
+		{
+			// Solid edge.
+			int px = x;
+			int pz = y;
+			switch(dir)
+			{
+				case 0: pz++; break;
+				case 1: px++; pz++; break;
+				case 2: px++; break;
+			}
+			
+			// Try to merge with previous vertex.
+			if (!appendVertex(cont, px, (int)layer.heights[x+y*w], pz,rn))
+				return false;
+			
+			ndir = (dir+1) & 0x3;  // Rotate CW
+		}
+		else
+		{
+			// Move to next.
+			nx = x + getDirOffsetX(dir);
+			ny = y + getDirOffsetY(dir);
+			ndir = (dir+3) & 0x3;	// Rotate CCW
+		}
+		
+		if (iter > 0 && x == startX && y == startY && dir == startDir)
+			break;
+		
+		x = nx;
+		y = ny;
+		dir = ndir;
+		
+		iter++;
+	}
+	
+	// Remove last vertex if it is duplicate of the first one.
+	unsigned char* pa = &cont.verts[(cont.nverts-1)*4];
+	unsigned char* pb = &cont.verts[0];
+	if (pa[0] == pb[0] && pa[2] == pb[2])
+		cont.nverts--;
+	
+	return true;
+}	
+
+
+static float distancePtSeg(const int x, const int z,
+						   const int px, const int pz,
+						   const int qx, const int qz)
+{
+	float pqx = (float)(qx - px);
+	float pqz = (float)(qz - pz);
+	float dx = (float)(x - px);
+	float dz = (float)(z - pz);
+	float d = pqx*pqx + pqz*pqz;
+	float t = pqx*dx + pqz*dz;
+	if (d > 0)
+		t /= d;
+	if (t < 0)
+		t = 0;
+	else if (t > 1)
+		t = 1;
+	
+	dx = px + t*pqx - x;
+	dz = pz + t*pqz - z;
+	
+	return dx*dx + dz*dz;
+}
+
+static void simplifyContour(dtTempContour& cont, const float maxError)
+{
+	cont.npoly = 0;
+	
+	for (int i = 0; i < cont.nverts; ++i)
+	{
+		int j = (i+1) % cont.nverts;
+		// Check for start of a wall segment.
+		unsigned char ra = cont.verts[j*4+3];
+		unsigned char rb = cont.verts[i*4+3];
+		if (ra != rb)
+			cont.poly[cont.npoly++] = (unsigned short)i;
+	}
+	if (cont.npoly < 2)
+	{
+		// If there is no transitions at all,
+		// create some initial points for the simplification process. 
+		// Find lower-left and upper-right vertices of the contour.
+		int llx = cont.verts[0];
+		int llz = cont.verts[2];
+		int lli = 0;
+		int urx = cont.verts[0];
+		int urz = cont.verts[2];
+		int uri = 0;
+		for (int i = 1; i < cont.nverts; ++i)
+		{
+			int x = cont.verts[i*4+0];
+			int z = cont.verts[i*4+2];
+			if (x < llx || (x == llx && z < llz))
+			{
+				llx = x;
+				llz = z;
+				lli = i;
+			}
+			if (x > urx || (x == urx && z > urz))
+			{
+				urx = x;
+				urz = z;
+				uri = i;
+			}
+		}
+		cont.npoly = 0;
+		cont.poly[cont.npoly++] = (unsigned short)lli;
+		cont.poly[cont.npoly++] = (unsigned short)uri;
+	}
+	
+	// Add points until all raw points are within
+	// error tolerance to the simplified shape.
+	for (int i = 0; i < cont.npoly; )
+	{
+		int ii = (i+1) % cont.npoly;
+		
+		const int ai = (int)cont.poly[i];
+		const int ax = (int)cont.verts[ai*4+0];
+		const int az = (int)cont.verts[ai*4+2];
+		
+		const int bi = (int)cont.poly[ii];
+		const int bx = (int)cont.verts[bi*4+0];
+		const int bz = (int)cont.verts[bi*4+2];
+		
+		// Find maximum deviation from the segment.
+		float maxd = 0;
+		int maxi = -1;
+		int ci, cinc, endi;
+		
+		// Traverse the segment in lexilogical order so that the
+		// max deviation is calculated similarly when traversing
+		// opposite segments.
+		if (bx > ax || (bx == ax && bz > az))
+		{
+			cinc = 1;
+			ci = (ai+cinc) % cont.nverts;
+			endi = bi;
+		}
+		else
+		{
+			cinc = cont.nverts-1;
+			ci = (bi+cinc) % cont.nverts;
+			endi = ai;
+		}
+		
+		// Tessellate only outer edges or edges between areas.
+		while (ci != endi)
+		{
+			float d = distancePtSeg(cont.verts[ci*4+0], cont.verts[ci*4+2], ax, az, bx, bz);
+			if (d > maxd)
+			{
+				maxd = d;
+				maxi = ci;
+			}
+			ci = (ci+cinc) % cont.nverts;
+		}
+		
+		
+		// If the max deviation is larger than accepted error,
+		// add new point, else continue to next segment.
+		if (maxi != -1 && maxd > (maxError*maxError))
+		{
+			cont.npoly++;
+			for (int j = cont.npoly-1; j > i; --j)
+				cont.poly[j] = cont.poly[j-1];
+			cont.poly[i+1] = (unsigned short)maxi;
+		}
+		else
+		{
+			++i;
+		}
+	}
+	
+	// Remap vertices
+	int start = 0;
+	for (int i = 1; i < cont.npoly; ++i)
+		if (cont.poly[i] < cont.poly[start])
+			start = i;
+	
+	cont.nverts = 0;
+	for (int i = 0; i < cont.npoly; ++i)
+	{
+		const int j = (start+i) % cont.npoly;
+		unsigned char* src = &cont.verts[cont.poly[j]*4];
+		unsigned char* dst = &cont.verts[cont.nverts*4];
+		dst[0] = src[0];
+		dst[1] = src[1];
+		dst[2] = src[2];
+		dst[3] = src[3];
+		cont.nverts++;
+	}
+}
+
+static unsigned char getCornerHeight(dtTileCacheLayer& layer,
+									 const int x, const int y, const int z,
+									 const int walkableClimb,
+									 bool& shouldRemove)
+{
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	
+	int n = 0;
+	
+	unsigned char portal = 0xf;
+	unsigned char height = 0;
+	unsigned char preg = 0xff;
+	bool allSameReg = true;
+	
+	for (int dz = -1; dz <= 0; ++dz)
+	{
+		for (int dx = -1; dx <= 0; ++dx)
+		{
+			const int px = x+dx;
+			const int pz = z+dz;
+			if (px >= 0 && pz >= 0 && px < w && pz < h)
+			{
+				const int idx  = px + pz*w;
+				const int lh = (int)layer.heights[idx];
+				if (dtAbs(lh-y) <= walkableClimb && layer.areas[idx] != DT_TILECACHE_NULL_AREA)
+				{
+					height = dtMax(height, (unsigned char)lh);
+					portal &= (layer.cons[idx] >> 4);
+					if (preg != 0xff && preg != layer.regs[idx])
+						allSameReg = false;
+					preg = layer.regs[idx]; 
+					n++;
+				}
+			}
+		}
+	}
+	
+	int portalCount = 0;
+	for (int dir = 0; dir < 4; ++dir)
+		if (portal & (1<<dir))
+			portalCount++;
+	
+	shouldRemove = false;
+	if (n > 1 && portalCount == 1 && allSameReg)
+	{
+		shouldRemove = true;
+	}
+	
+	return height;
+}
+
+
+// TODO: move this somewhere else, once the layer meshing is done.
+dtStatus dtBuildTileCacheContours(dtTileCacheAlloc* alloc,
+								  dtTileCacheLayer& layer,
+								  const int walkableClimb, 	const float maxError,
+								  dtTileCacheContourSet& lcset)
+{
+	dtAssert(alloc);
+
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	
+	lcset.nconts = layer.regCount;
+	lcset.conts = (dtTileCacheContour*)alloc->alloc(sizeof(dtTileCacheContour)*lcset.nconts);
+	if (!lcset.conts)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(lcset.conts, 0, sizeof(dtTileCacheContour)*lcset.nconts);
+	
+	// Allocate temp buffer for contour tracing.
+	const int maxTempVerts = (w+h)*2 * 2; // Twice around the layer.
+	
+	dtFixedArray<unsigned char> tempVerts(alloc, maxTempVerts*4);
+	if (!tempVerts)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	
+	dtFixedArray<unsigned short> tempPoly(alloc, maxTempVerts);
+	if (!tempPoly)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+
+	dtTempContour temp(tempVerts, maxTempVerts, tempPoly, maxTempVerts);
+	
+	// Find contours.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const int idx = x+y*w;
+			const unsigned char ri = layer.regs[idx];
+			if (ri == 0xff)
+				continue;
+			
+			dtTileCacheContour& cont = lcset.conts[ri];
+			
+			if (cont.nverts > 0)
+				continue;
+			
+			cont.reg = ri;
+			cont.area = layer.areas[idx];
+			
+			if (!walkContour(layer, x, y, temp))
+			{
+				// Too complex contour.
+				// Note: If you hit here ofte, try increasing 'maxTempVerts'.
+				return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+			}
+			
+			simplifyContour(temp, maxError);
+			
+			// Store contour.
+			cont.nverts = temp.nverts;
+			if (cont.nverts > 0)
+			{
+				cont.verts = (unsigned char*)alloc->alloc(sizeof(unsigned char)*4*temp.nverts);
+				if (!cont.verts)
+					return DT_FAILURE | DT_OUT_OF_MEMORY;
+				
+				for (int i = 0, j = temp.nverts-1; i < temp.nverts; j=i++)
+				{
+					unsigned char* dst = &cont.verts[j*4];
+					unsigned char* v = &temp.verts[j*4];
+					unsigned char* vn = &temp.verts[i*4];
+					unsigned char nei = vn[3]; // The neighbour reg is stored at segment vertex of a segment. 
+					bool shouldRemove = false;
+					unsigned char lh = getCornerHeight(layer, (int)v[0], (int)v[1], (int)v[2],
+													   walkableClimb, shouldRemove);
+					
+					dst[0] = v[0];
+					dst[1] = lh;
+					dst[2] = v[2];
+					
+					// Store portal direction and remove status to the fourth component.
+					dst[3] = 0x0f;
+					if (nei != 0xff && nei >= 0xf8)
+						dst[3] = nei - 0xf8;
+					if (shouldRemove)
+						dst[3] |= 0x80;
+				}
+			}
+		}
+	}
+	
+	return DT_SUCCESS;
+}	
+
+
+
+static const int VERTEX_BUCKET_COUNT2 = (1<<8);
+
+inline int computeVertexHash2(int x, int y, int z)
+{
+	const unsigned int h1 = 0x8da6b343; // Large multiplicative constants;
+	const unsigned int h2 = 0xd8163841; // here arbitrarily chosen primes
+	const unsigned int h3 = 0xcb1ab31f;
+	unsigned int n = h1 * x + h2 * y + h3 * z;
+	return (int)(n & (VERTEX_BUCKET_COUNT2-1));
+}
+
+static unsigned short addVertex(unsigned short x, unsigned short y, unsigned short z,
+								unsigned short* verts, unsigned short* firstVert, unsigned short* nextVert, int& nv)
+{
+	int bucket = computeVertexHash2(x, 0, z);
+	unsigned short i = firstVert[bucket];
+	
+	while (i != DT_TILECACHE_NULL_IDX)
+	{
+		const unsigned short* v = &verts[i*3];
+		if (v[0] == x && v[2] == z && (dtAbs(v[1] - y) <= 2))
+			return i;
+		i = nextVert[i]; // next
+	}
+	
+	// Could not find, create new.
+	i = (unsigned short)nv; nv++;
+	unsigned short* v = &verts[i*3];
+	v[0] = x;
+	v[1] = y;
+	v[2] = z;
+	nextVert[i] = firstVert[bucket];
+	firstVert[bucket] = i;
+	
+	return (unsigned short)i;
+}
+
+
+struct rcEdge
+{
+	unsigned short vert[2];
+	unsigned short polyEdge[2];
+	unsigned short poly[2];
+};
+
+static bool buildMeshAdjacency(dtTileCacheAlloc* alloc,
+							   unsigned short* polys, const int npolys,
+							   const unsigned short* verts, const int nverts,
+							   const dtTileCacheContourSet& lcset)
+{
+	// Based on code by Eric Lengyel from:
+	// http://www.terathon.com/code/edges.php
+	
+	const int maxEdgeCount = npolys*MAX_VERTS_PER_POLY;
+	dtFixedArray<unsigned short> firstEdge(alloc, nverts + maxEdgeCount);
+	if (!firstEdge)
+		return false;
+	unsigned short* nextEdge = firstEdge + nverts;
+	int edgeCount = 0;
+	
+	dtFixedArray<rcEdge> edges(alloc, maxEdgeCount);
+	if (!edges)
+		return false;
+	
+	for (int i = 0; i < nverts; i++)
+		firstEdge[i] = DT_TILECACHE_NULL_IDX;
+	
+	for (int i = 0; i < npolys; ++i)
+	{
+		unsigned short* t = &polys[i*MAX_VERTS_PER_POLY*2];
+		for (int j = 0; j < MAX_VERTS_PER_POLY; ++j)
+		{
+			if (t[j] == DT_TILECACHE_NULL_IDX) break;
+			unsigned short v0 = t[j];
+			unsigned short v1 = (j+1 >= MAX_VERTS_PER_POLY || t[j+1] == DT_TILECACHE_NULL_IDX) ? t[0] : t[j+1];
+			if (v0 < v1)
+			{
+				rcEdge& edge = edges[edgeCount];
+				edge.vert[0] = v0;
+				edge.vert[1] = v1;
+				edge.poly[0] = (unsigned short)i;
+				edge.polyEdge[0] = (unsigned short)j;
+				edge.poly[1] = (unsigned short)i;
+				edge.polyEdge[1] = 0xff;
+				// Insert edge
+				nextEdge[edgeCount] = firstEdge[v0];
+				firstEdge[v0] = (unsigned short)edgeCount;
+				edgeCount++;
+			}
+		}
+	}
+	
+	for (int i = 0; i < npolys; ++i)
+	{
+		unsigned short* t = &polys[i*MAX_VERTS_PER_POLY*2];
+		for (int j = 0; j < MAX_VERTS_PER_POLY; ++j)
+		{
+			if (t[j] == DT_TILECACHE_NULL_IDX) break;
+			unsigned short v0 = t[j];
+			unsigned short v1 = (j+1 >= MAX_VERTS_PER_POLY || t[j+1] == DT_TILECACHE_NULL_IDX) ? t[0] : t[j+1];
+			if (v0 > v1)
+			{
+				bool found = false;
+				for (unsigned short e = firstEdge[v1]; e != DT_TILECACHE_NULL_IDX; e = nextEdge[e])
+				{
+					rcEdge& edge = edges[e];
+					if (edge.vert[1] == v0 && edge.poly[0] == edge.poly[1])
+					{
+						edge.poly[1] = (unsigned short)i;
+						edge.polyEdge[1] = (unsigned short)j;
+						found = true;
+						break;
+					}
+				}
+				if (!found)
+				{
+					// Matching edge not found, it is an open edge, add it.
+					rcEdge& edge = edges[edgeCount];
+					edge.vert[0] = v1;
+					edge.vert[1] = v0;
+					edge.poly[0] = (unsigned short)i;
+					edge.polyEdge[0] = (unsigned short)j;
+					edge.poly[1] = (unsigned short)i;
+					edge.polyEdge[1] = 0xff;
+					// Insert edge
+					nextEdge[edgeCount] = firstEdge[v1];
+					firstEdge[v1] = (unsigned short)edgeCount;
+					edgeCount++;
+				}
+			}
+		}
+	}
+	
+	// Mark portal edges.
+	for (int i = 0; i < lcset.nconts; ++i)
+	{
+		dtTileCacheContour& cont = lcset.conts[i];
+		if (cont.nverts < 3)
+			continue;
+		
+		for (int j = 0, k = cont.nverts-1; j < cont.nverts; k=j++)
+		{
+			const unsigned char* va = &cont.verts[k*4];
+			const unsigned char* vb = &cont.verts[j*4];
+			const unsigned char dir = va[3] & 0xf;
+			if (dir == 0xf)
+				continue;
+			
+			if (dir == 0 || dir == 2)
+			{
+				// Find matching vertical edge
+				const unsigned short x = (unsigned short)va[0];
+				unsigned short zmin = (unsigned short)va[2];
+				unsigned short zmax = (unsigned short)vb[2];
+				if (zmin > zmax)
+					dtSwap(zmin, zmax);
+				
+				for (int m = 0; m < edgeCount; ++m)
+				{
+					rcEdge& e = edges[m];
+					// Skip connected edges.
+					if (e.poly[0] != e.poly[1])
+						continue;
+					const unsigned short* eva = &verts[e.vert[0]*3];
+					const unsigned short* evb = &verts[e.vert[1]*3];
+					if (eva[0] == x && evb[0] == x)
+					{
+						unsigned short ezmin = eva[2];
+						unsigned short ezmax = evb[2];
+						if (ezmin > ezmax)
+							dtSwap(ezmin, ezmax);
+						if (overlapRangeExl(zmin,zmax, ezmin, ezmax))
+						{
+							// Reuse the other polyedge to store dir.
+							e.polyEdge[1] = dir;
+						}
+					}
+				}
+			}
+			else
+			{
+				// Find matching vertical edge
+				const unsigned short z = (unsigned short)va[2];
+				unsigned short xmin = (unsigned short)va[0];
+				unsigned short xmax = (unsigned short)vb[0];
+				if (xmin > xmax)
+					dtSwap(xmin, xmax);
+				for (int m = 0; m < edgeCount; ++m)
+				{
+					rcEdge& e = edges[m];
+					// Skip connected edges.
+					if (e.poly[0] != e.poly[1])
+						continue;
+					const unsigned short* eva = &verts[e.vert[0]*3];
+					const unsigned short* evb = &verts[e.vert[1]*3];
+					if (eva[2] == z && evb[2] == z)
+					{
+						unsigned short exmin = eva[0];
+						unsigned short exmax = evb[0];
+						if (exmin > exmax)
+							dtSwap(exmin, exmax);
+						if (overlapRangeExl(xmin,xmax, exmin, exmax))
+						{
+							// Reuse the other polyedge to store dir.
+							e.polyEdge[1] = dir;
+						}
+					}
+				}
+			}
+		}
+	}
+	
+	
+	// Store adjacency
+	for (int i = 0; i < edgeCount; ++i)
+	{
+		const rcEdge& e = edges[i];
+		if (e.poly[0] != e.poly[1])
+		{
+			unsigned short* p0 = &polys[e.poly[0]*MAX_VERTS_PER_POLY*2];
+			unsigned short* p1 = &polys[e.poly[1]*MAX_VERTS_PER_POLY*2];
+			p0[MAX_VERTS_PER_POLY + e.polyEdge[0]] = e.poly[1];
+			p1[MAX_VERTS_PER_POLY + e.polyEdge[1]] = e.poly[0];
+		}
+		else if (e.polyEdge[1] != 0xff)
+		{
+			unsigned short* p0 = &polys[e.poly[0]*MAX_VERTS_PER_POLY*2];
+			p0[MAX_VERTS_PER_POLY + e.polyEdge[0]] = 0x8000 | (unsigned short)e.polyEdge[1];
+		}
+		
+	}
+	
+	return true;
+}
+
+
+inline int prev(int i, int n) { return i-1 >= 0 ? i-1 : n-1; }
+inline int next(int i, int n) { return i+1 < n ? i+1 : 0; }
+
+inline int area2(const unsigned char* a, const unsigned char* b, const unsigned char* c)
+{
+	return ((int)b[0] - (int)a[0]) * ((int)c[2] - (int)a[2]) - ((int)c[0] - (int)a[0]) * ((int)b[2] - (int)a[2]);
+}
+
+//	Exclusive or: true iff exactly one argument is true.
+//	The arguments are negated to ensure that they are 0/1
+//	values.  Then the bitwise Xor operator may apply.
+//	(This idea is due to Michael Baldwin.)
+inline bool xorb(bool x, bool y)
+{
+	return !x ^ !y;
+}
+
+// Returns true iff c is strictly to the left of the directed
+// line through a to b.
+inline bool left(const unsigned char* a, const unsigned char* b, const unsigned char* c)
+{
+	return area2(a, b, c) < 0;
+}
+
+inline bool leftOn(const unsigned char* a, const unsigned char* b, const unsigned char* c)
+{
+	return area2(a, b, c) <= 0;
+}
+
+inline bool collinear(const unsigned char* a, const unsigned char* b, const unsigned char* c)
+{
+	return area2(a, b, c) == 0;
+}
+
+//	Returns true iff ab properly intersects cd: they share
+//	a point interior to both segments.  The properness of the
+//	intersection is ensured by using strict leftness.
+static bool intersectProp(const unsigned char* a, const unsigned char* b,
+						  const unsigned char* c, const unsigned char* d)
+{
+	// Eliminate improper cases.
+	if (collinear(a,b,c) || collinear(a,b,d) ||
+		collinear(c,d,a) || collinear(c,d,b))
+		return false;
+	
+	return xorb(left(a,b,c), left(a,b,d)) && xorb(left(c,d,a), left(c,d,b));
+}
+
+// Returns T iff (a,b,c) are collinear and point c lies 
+// on the closed segement ab.
+static bool between(const unsigned char* a, const unsigned char* b, const unsigned char* c)
+{
+	if (!collinear(a, b, c))
+		return false;
+	// If ab not vertical, check betweenness on x; else on y.
+	if (a[0] != b[0])
+		return ((a[0] <= c[0]) && (c[0] <= b[0])) || ((a[0] >= c[0]) && (c[0] >= b[0]));
+	else
+		return ((a[2] <= c[2]) && (c[2] <= b[2])) || ((a[2] >= c[2]) && (c[2] >= b[2]));
+}
+
+// Returns true iff segments ab and cd intersect, properly or improperly.
+static bool intersect(const unsigned char* a, const unsigned char* b,
+					  const unsigned char* c, const unsigned char* d)
+{
+	if (intersectProp(a, b, c, d))
+		return true;
+	else if (between(a, b, c) || between(a, b, d) ||
+			 between(c, d, a) || between(c, d, b))
+		return true;
+	else
+		return false;
+}
+
+static bool vequal(const unsigned char* a, const unsigned char* b)
+{
+	return a[0] == b[0] && a[2] == b[2];
+}
+
+// Returns T iff (v_i, v_j) is a proper internal *or* external
+// diagonal of P, *ignoring edges incident to v_i and v_j*.
+static bool diagonalie(int i, int j, int n, const unsigned char* verts, const unsigned short* indices)
+{
+	const unsigned char* d0 = &verts[(indices[i] & 0x7fff) * 4];
+	const unsigned char* d1 = &verts[(indices[j] & 0x7fff) * 4];
+	
+	// For each edge (k,k+1) of P
+	for (int k = 0; k < n; k++)
+	{
+		int k1 = next(k, n);
+		// Skip edges incident to i or j
+		if (!((k == i) || (k1 == i) || (k == j) || (k1 == j)))
+		{
+			const unsigned char* p0 = &verts[(indices[k] & 0x7fff) * 4];
+			const unsigned char* p1 = &verts[(indices[k1] & 0x7fff) * 4];
+			
+			if (vequal(d0, p0) || vequal(d1, p0) || vequal(d0, p1) || vequal(d1, p1))
+				continue;
+			
+			if (intersect(d0, d1, p0, p1))
+				return false;
+		}
+	}
+	return true;
+}
+
+// Returns true iff the diagonal (i,j) is strictly internal to the 
+// polygon P in the neighborhood of the i endpoint.
+static bool	inCone(int i, int j, int n, const unsigned char* verts, const unsigned short* indices)
+{
+	const unsigned char* pi = &verts[(indices[i] & 0x7fff) * 4];
+	const unsigned char* pj = &verts[(indices[j] & 0x7fff) * 4];
+	const unsigned char* pi1 = &verts[(indices[next(i, n)] & 0x7fff) * 4];
+	const unsigned char* pin1 = &verts[(indices[prev(i, n)] & 0x7fff) * 4];
+	
+	// If P[i] is a convex vertex [ i+1 left or on (i-1,i) ].
+	if (leftOn(pin1, pi, pi1))
+		return left(pi, pj, pin1) && left(pj, pi, pi1);
+	// Assume (i-1,i,i+1) not collinear.
+	// else P[i] is reflex.
+	return !(leftOn(pi, pj, pi1) && leftOn(pj, pi, pin1));
+}
+
+// Returns T iff (v_i, v_j) is a proper internal
+// diagonal of P.
+static bool diagonal(int i, int j, int n, const unsigned char* verts, const unsigned short* indices)
+{
+	return inCone(i, j, n, verts, indices) && diagonalie(i, j, n, verts, indices);
+}
+
+static int triangulate(int n, const unsigned char* verts, unsigned short* indices, unsigned short* tris)
+{
+	int ntris = 0;
+	unsigned short* dst = tris;
+	
+	// The last bit of the index is used to indicate if the vertex can be removed.
+	for (int i = 0; i < n; i++)
+	{
+		int i1 = next(i, n);
+		int i2 = next(i1, n);
+		if (diagonal(i, i2, n, verts, indices))
+			indices[i1] |= 0x8000;
+	}
+	
+	while (n > 3)
+	{
+		int minLen = -1;
+		int mini = -1;
+		for (int i = 0; i < n; i++)
+		{
+			int i1 = next(i, n);
+			if (indices[i1] & 0x8000)
+			{
+				const unsigned char* p0 = &verts[(indices[i] & 0x7fff) * 4];
+				const unsigned char* p2 = &verts[(indices[next(i1, n)] & 0x7fff) * 4];
+				
+				const int dx = (int)p2[0] - (int)p0[0];
+				const int dz = (int)p2[2] - (int)p0[2];
+				const int len = dx*dx + dz*dz;
+				if (minLen < 0 || len < minLen)
+				{
+					minLen = len;
+					mini = i;
+				}
+			}
+		}
+		
+		if (mini == -1)
+		{
+			// Should not happen.
+			/*			printf("mini == -1 ntris=%d n=%d\n", ntris, n);
+			 for (int i = 0; i < n; i++)
+			 {
+			 printf("%d ", indices[i] & 0x0fffffff);
+			 }
+			 printf("\n");*/
+			return -ntris;
+		}
+		
+		int i = mini;
+		int i1 = next(i, n);
+		int i2 = next(i1, n);
+		
+		*dst++ = indices[i] & 0x7fff;
+		*dst++ = indices[i1] & 0x7fff;
+		*dst++ = indices[i2] & 0x7fff;
+		ntris++;
+		
+		// Removes P[i1] by copying P[i+1]...P[n-1] left one index.
+		n--;
+		for (int k = i1; k < n; k++)
+			indices[k] = indices[k+1];
+		
+		if (i1 >= n) i1 = 0;
+		i = prev(i1,n);
+		// Update diagonal flags.
+		if (diagonal(prev(i, n), i1, n, verts, indices))
+			indices[i] |= 0x8000;
+		else
+			indices[i] &= 0x7fff;
+		
+		if (diagonal(i, next(i1, n), n, verts, indices))
+			indices[i1] |= 0x8000;
+		else
+			indices[i1] &= 0x7fff;
+	}
+	
+	// Append the remaining triangle.
+	*dst++ = indices[0] & 0x7fff;
+	*dst++ = indices[1] & 0x7fff;
+	*dst++ = indices[2] & 0x7fff;
+	ntris++;
+	
+	return ntris;
+}
+
+
+static int countPolyVerts(const unsigned short* p)
+{
+	for (int i = 0; i < MAX_VERTS_PER_POLY; ++i)
+		if (p[i] == DT_TILECACHE_NULL_IDX)
+			return i;
+	return MAX_VERTS_PER_POLY;
+}
+
+inline bool uleft(const unsigned short* a, const unsigned short* b, const unsigned short* c)
+{
+	return ((int)b[0] - (int)a[0]) * ((int)c[2] - (int)a[2]) -
+	((int)c[0] - (int)a[0]) * ((int)b[2] - (int)a[2]) < 0;
+}
+
+static int getPolyMergeValue(unsigned short* pa, unsigned short* pb,
+							 const unsigned short* verts, int& ea, int& eb)
+{
+	const int na = countPolyVerts(pa);
+	const int nb = countPolyVerts(pb);
+	
+	// If the merged polygon would be too big, do not merge.
+	if (na+nb-2 > MAX_VERTS_PER_POLY)
+		return -1;
+	
+	// Check if the polygons share an edge.
+	ea = -1;
+	eb = -1;
+	
+	for (int i = 0; i < na; ++i)
+	{
+		unsigned short va0 = pa[i];
+		unsigned short va1 = pa[(i+1) % na];
+		if (va0 > va1)
+			dtSwap(va0, va1);
+		for (int j = 0; j < nb; ++j)
+		{
+			unsigned short vb0 = pb[j];
+			unsigned short vb1 = pb[(j+1) % nb];
+			if (vb0 > vb1)
+				dtSwap(vb0, vb1);
+			if (va0 == vb0 && va1 == vb1)
+			{
+				ea = i;
+				eb = j;
+				break;
+			}
+		}
+	}
+	
+	// No common edge, cannot merge.
+	if (ea == -1 || eb == -1)
+		return -1;
+	
+	// Check to see if the merged polygon would be convex.
+	unsigned short va, vb, vc;
+	
+	va = pa[(ea+na-1) % na];
+	vb = pa[ea];
+	vc = pb[(eb+2) % nb];
+	if (!uleft(&verts[va*3], &verts[vb*3], &verts[vc*3]))
+		return -1;
+	
+	va = pb[(eb+nb-1) % nb];
+	vb = pb[eb];
+	vc = pa[(ea+2) % na];
+	if (!uleft(&verts[va*3], &verts[vb*3], &verts[vc*3]))
+		return -1;
+	
+	va = pa[ea];
+	vb = pa[(ea+1)%na];
+	
+	int dx = (int)verts[va*3+0] - (int)verts[vb*3+0];
+	int dy = (int)verts[va*3+2] - (int)verts[vb*3+2];
+	
+	return dx*dx + dy*dy;
+}
+
+static void mergePolys(unsigned short* pa, unsigned short* pb, int ea, int eb)
+{
+	unsigned short tmp[MAX_VERTS_PER_POLY*2];
+	
+	const int na = countPolyVerts(pa);
+	const int nb = countPolyVerts(pb);
+	
+	// Merge polygons.
+	memset(tmp, 0xff, sizeof(unsigned short)*MAX_VERTS_PER_POLY*2);
+	int n = 0;
+	// Add pa
+	for (int i = 0; i < na-1; ++i)
+		tmp[n++] = pa[(ea+1+i) % na];
+	// Add pb
+	for (int i = 0; i < nb-1; ++i)
+		tmp[n++] = pb[(eb+1+i) % nb];
+	
+	memcpy(pa, tmp, sizeof(unsigned short)*MAX_VERTS_PER_POLY);
+}
+
+
+static void pushFront(unsigned short v, unsigned short* arr, int& an)
+{
+	an++;
+	for (int i = an-1; i > 0; --i)
+		arr[i] = arr[i-1];
+	arr[0] = v;
+}
+
+static void pushBack(unsigned short v, unsigned short* arr, int& an)
+{
+	arr[an] = v;
+	an++;
+}
+
+static bool canRemoveVertex(dtTileCachePolyMesh& mesh, const unsigned short rem)
+{
+	// Count number of polygons to remove.
+	int numRemovedVerts = 0;
+	int numTouchedVerts = 0;
+	int numRemainingEdges = 0;
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*MAX_VERTS_PER_POLY*2];
+		const int nv = countPolyVerts(p);
+		int numRemoved = 0;
+		int numVerts = 0;
+		for (int j = 0; j < nv; ++j)
+		{
+			if (p[j] == rem)
+			{
+				numTouchedVerts++;
+				numRemoved++;
+			}
+			numVerts++;
+		}
+		if (numRemoved)
+		{
+			numRemovedVerts += numRemoved;
+			numRemainingEdges += numVerts-(numRemoved+1);
+		}
+	}
+	
+	// There would be too few edges remaining to create a polygon.
+	// This can happen for example when a tip of a triangle is marked
+	// as deletion, but there are no other polys that share the vertex.
+	// In this case, the vertex should not be removed.
+	if (numRemainingEdges <= 2)
+		return false;
+	
+	// Check that there is enough memory for the test.
+	const int maxEdges = numTouchedVerts*2;
+	if (maxEdges > MAX_REM_EDGES)
+		return false;
+	
+	// Find edges which share the removed vertex.
+	unsigned short edges[MAX_REM_EDGES];
+	int nedges = 0;
+	
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*MAX_VERTS_PER_POLY*2];
+		const int nv = countPolyVerts(p);
+		
+		// Collect edges which touches the removed vertex.
+		for (int j = 0, k = nv-1; j < nv; k = j++)
+		{
+			if (p[j] == rem || p[k] == rem)
+			{
+				// Arrange edge so that a=rem.
+				int a = p[j], b = p[k];
+				if (b == rem)
+					dtSwap(a,b);
+				
+				// Check if the edge exists
+				bool exists = false;
+				for (int m = 0; m < nedges; ++m)
+				{
+					unsigned short* e = &edges[m*3];
+					if (e[1] == b)
+					{
+						// Exists, increment vertex share count.
+						e[2]++;
+						exists = true;
+					}
+				}
+				// Add new edge.
+				if (!exists)
+				{
+					unsigned short* e = &edges[nedges*3];
+					e[0] = (unsigned short)a;
+					e[1] = (unsigned short)b;
+					e[2] = 1;
+					nedges++;
+				}
+			}
+		}
+	}
+	
+	// There should be no more than 2 open edges.
+	// This catches the case that two non-adjacent polygons
+	// share the removed vertex. In that case, do not remove the vertex.
+	int numOpenEdges = 0;
+	for (int i = 0; i < nedges; ++i)
+	{
+		if (edges[i*3+2] < 2)
+			numOpenEdges++;
+	}
+	if (numOpenEdges > 2)
+		return false;
+	
+	return true;
+}
+
+static dtStatus removeVertex(dtTileCachePolyMesh& mesh, const unsigned short rem, const int maxTris)
+{
+	// Count number of polygons to remove.
+	int numRemovedVerts = 0;
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*MAX_VERTS_PER_POLY*2];
+		const int nv = countPolyVerts(p);
+		for (int j = 0; j < nv; ++j)
+		{
+			if (p[j] == rem)
+				numRemovedVerts++;
+		}
+	}
+	
+	int nedges = 0;
+	unsigned short edges[MAX_REM_EDGES*3];
+	int nhole = 0;
+	unsigned short hole[MAX_REM_EDGES];
+	int nharea = 0;
+	unsigned short harea[MAX_REM_EDGES];
+	
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*MAX_VERTS_PER_POLY*2];
+		const int nv = countPolyVerts(p);
+		bool hasRem = false;
+		for (int j = 0; j < nv; ++j)
+			if (p[j] == rem) hasRem = true;
+		if (hasRem)
+		{
+			// Collect edges which does not touch the removed vertex.
+			for (int j = 0, k = nv-1; j < nv; k = j++)
+			{
+				if (p[j] != rem && p[k] != rem)
+				{
+					if (nedges >= MAX_REM_EDGES)
+						return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+					unsigned short* e = &edges[nedges*3];
+					e[0] = p[k];
+					e[1] = p[j];
+					e[2] = mesh.areas[i];
+					nedges++;
+				}
+			}
+			// Remove the polygon.
+			unsigned short* p2 = &mesh.polys[(mesh.npolys-1)*MAX_VERTS_PER_POLY*2];
+			memcpy(p,p2,sizeof(unsigned short)*MAX_VERTS_PER_POLY);
+			memset(p+MAX_VERTS_PER_POLY,0xff,sizeof(unsigned short)*MAX_VERTS_PER_POLY);
+			mesh.areas[i] = mesh.areas[mesh.npolys-1];
+			mesh.npolys--;
+			--i;
+		}
+	}
+	
+	// Remove vertex.
+	for (int i = (int)rem; i < mesh.nverts; ++i)
+	{
+		mesh.verts[i*3+0] = mesh.verts[(i+1)*3+0];
+		mesh.verts[i*3+1] = mesh.verts[(i+1)*3+1];
+		mesh.verts[i*3+2] = mesh.verts[(i+1)*3+2];
+	}
+	mesh.nverts--;
+	
+	// Adjust indices to match the removed vertex layout.
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*MAX_VERTS_PER_POLY*2];
+		const int nv = countPolyVerts(p);
+		for (int j = 0; j < nv; ++j)
+			if (p[j] > rem) p[j]--;
+	}
+	for (int i = 0; i < nedges; ++i)
+	{
+		if (edges[i*3+0] > rem) edges[i*3+0]--;
+		if (edges[i*3+1] > rem) edges[i*3+1]--;
+	}
+	
+	if (nedges == 0)
+		return DT_SUCCESS;
+	
+	// Start with one vertex, keep appending connected
+	// segments to the start and end of the hole.
+	pushBack(edges[0], hole, nhole);
+	pushBack(edges[2], harea, nharea);
+	
+	while (nedges)
+	{
+		bool match = false;
+		
+		for (int i = 0; i < nedges; ++i)
+		{
+			const unsigned short ea = edges[i*3+0];
+			const unsigned short eb = edges[i*3+1];
+			const unsigned short a = edges[i*3+2];
+			bool add = false;
+			if (hole[0] == eb)
+			{
+				// The segment matches the beginning of the hole boundary.
+				if (nhole >= MAX_REM_EDGES)
+					return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+				pushFront(ea, hole, nhole);
+				pushFront(a, harea, nharea);
+				add = true;
+			}
+			else if (hole[nhole-1] == ea)
+			{
+				// The segment matches the end of the hole boundary.
+				if (nhole >= MAX_REM_EDGES)
+					return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+				pushBack(eb, hole, nhole);
+				pushBack(a, harea, nharea);
+				add = true;
+			}
+			if (add)
+			{
+				// The edge segment was added, remove it.
+				edges[i*3+0] = edges[(nedges-1)*3+0];
+				edges[i*3+1] = edges[(nedges-1)*3+1];
+				edges[i*3+2] = edges[(nedges-1)*3+2];
+				--nedges;
+				match = true;
+				--i;
+			}
+		}
+		
+		if (!match)
+			break;
+	}
+	
+	
+	unsigned short tris[MAX_REM_EDGES*3];
+	unsigned char tverts[MAX_REM_EDGES*3];
+	unsigned short tpoly[MAX_REM_EDGES*3];
+	
+	// Generate temp vertex array for triangulation.
+	for (int i = 0; i < nhole; ++i)
+	{
+		const unsigned short pi = hole[i];
+		tverts[i*4+0] = (unsigned char)mesh.verts[pi*3+0];
+		tverts[i*4+1] = (unsigned char)mesh.verts[pi*3+1];
+		tverts[i*4+2] = (unsigned char)mesh.verts[pi*3+2];
+		tverts[i*4+3] = 0;
+		tpoly[i] = (unsigned short)i;
+	}
+	
+	// Triangulate the hole.
+	int ntris = triangulate(nhole, tverts, tpoly, tris);
+	if (ntris < 0)
+	{
+		// TODO: issue warning!
+		ntris = -ntris;
+	}
+	
+	if (ntris > MAX_REM_EDGES)
+		return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+	
+	unsigned short polys[MAX_REM_EDGES*MAX_VERTS_PER_POLY];
+	unsigned char pareas[MAX_REM_EDGES];
+	
+	// Build initial polygons.
+	int npolys = 0;
+	memset(polys, 0xff, ntris*MAX_VERTS_PER_POLY*sizeof(unsigned short));
+	for (int j = 0; j < ntris; ++j)
+	{
+		unsigned short* t = &tris[j*3];
+		if (t[0] != t[1] && t[0] != t[2] && t[1] != t[2])
+		{
+			polys[npolys*MAX_VERTS_PER_POLY+0] = hole[t[0]];
+			polys[npolys*MAX_VERTS_PER_POLY+1] = hole[t[1]];
+			polys[npolys*MAX_VERTS_PER_POLY+2] = hole[t[2]];
+			pareas[npolys] = (unsigned char)harea[t[0]];
+			npolys++;
+		}
+	}
+	if (!npolys)
+		return DT_SUCCESS;
+	
+	// Merge polygons.
+	int maxVertsPerPoly = MAX_VERTS_PER_POLY;
+	if (maxVertsPerPoly > 3)
+	{
+		for (;;)
+		{
+			// Find best polygons to merge.
+			int bestMergeVal = 0;
+			int bestPa = 0, bestPb = 0, bestEa = 0, bestEb = 0;
+			
+			for (int j = 0; j < npolys-1; ++j)
+			{
+				unsigned short* pj = &polys[j*MAX_VERTS_PER_POLY];
+				for (int k = j+1; k < npolys; ++k)
+				{
+					unsigned short* pk = &polys[k*MAX_VERTS_PER_POLY];
+					int ea, eb;
+					int v = getPolyMergeValue(pj, pk, mesh.verts, ea, eb);
+					if (v > bestMergeVal)
+					{
+						bestMergeVal = v;
+						bestPa = j;
+						bestPb = k;
+						bestEa = ea;
+						bestEb = eb;
+					}
+				}
+			}
+			
+			if (bestMergeVal > 0)
+			{
+				// Found best, merge.
+				unsigned short* pa = &polys[bestPa*MAX_VERTS_PER_POLY];
+				unsigned short* pb = &polys[bestPb*MAX_VERTS_PER_POLY];
+				mergePolys(pa, pb, bestEa, bestEb);
+				memcpy(pb, &polys[(npolys-1)*MAX_VERTS_PER_POLY], sizeof(unsigned short)*MAX_VERTS_PER_POLY);
+				pareas[bestPb] = pareas[npolys-1];
+				npolys--;
+			}
+			else
+			{
+				// Could not merge any polygons, stop.
+				break;
+			}
+		}
+	}
+	
+	// Store polygons.
+	for (int i = 0; i < npolys; ++i)
+	{
+		if (mesh.npolys >= maxTris) break;
+		unsigned short* p = &mesh.polys[mesh.npolys*MAX_VERTS_PER_POLY*2];
+		memset(p,0xff,sizeof(unsigned short)*MAX_VERTS_PER_POLY*2);
+		for (int j = 0; j < MAX_VERTS_PER_POLY; ++j)
+			p[j] = polys[i*MAX_VERTS_PER_POLY+j];
+		mesh.areas[mesh.npolys] = pareas[i];
+		mesh.npolys++;
+		if (mesh.npolys > maxTris)
+			return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+	}
+	
+	return DT_SUCCESS;
+}
+
+
+dtStatus dtBuildTileCachePolyMesh(dtTileCacheAlloc* alloc,
+								  dtTileCacheContourSet& lcset,
+								  dtTileCachePolyMesh& mesh)
+{
+	dtAssert(alloc);
+	
+	int maxVertices = 0;
+	int maxTris = 0;
+	int maxVertsPerCont = 0;
+	for (int i = 0; i < lcset.nconts; ++i)
+	{
+		// Skip null contours.
+		if (lcset.conts[i].nverts < 3) continue;
+		maxVertices += lcset.conts[i].nverts;
+		maxTris += lcset.conts[i].nverts - 2;
+		maxVertsPerCont = dtMax(maxVertsPerCont, lcset.conts[i].nverts);
+	}
+
+	// TODO: warn about too many vertices?
+	
+	mesh.nvp = MAX_VERTS_PER_POLY;
+	
+	dtFixedArray<unsigned char> vflags(alloc, maxVertices);
+	if (!vflags)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(vflags, 0, maxVertices);
+	
+	mesh.verts = (unsigned short*)alloc->alloc(sizeof(unsigned short)*maxVertices*3);
+	if (!mesh.verts)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	
+	mesh.polys = (unsigned short*)alloc->alloc(sizeof(unsigned short)*maxTris*MAX_VERTS_PER_POLY*2);
+	if (!mesh.polys)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+
+	mesh.areas = (unsigned char*)alloc->alloc(sizeof(unsigned char)*maxTris);
+	if (!mesh.areas)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+
+	mesh.flags = (unsigned short*)alloc->alloc(sizeof(unsigned short)*maxTris);
+	if (!mesh.flags)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+
+	// Just allocate and clean the mesh flags array. The user is resposible for filling it.
+	memset(mesh.flags, 0, sizeof(unsigned short) * maxTris);
+		
+	mesh.nverts = 0;
+	mesh.npolys = 0;
+	
+	memset(mesh.verts, 0, sizeof(unsigned short)*maxVertices*3);
+	memset(mesh.polys, 0xff, sizeof(unsigned short)*maxTris*MAX_VERTS_PER_POLY*2);
+	memset(mesh.areas, 0, sizeof(unsigned char)*maxTris);
+	
+	unsigned short firstVert[VERTEX_BUCKET_COUNT2];
+	for (int i = 0; i < VERTEX_BUCKET_COUNT2; ++i)
+		firstVert[i] = DT_TILECACHE_NULL_IDX;
+	
+	dtFixedArray<unsigned short> nextVert(alloc, maxVertices);
+	if (!nextVert)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(nextVert, 0, sizeof(unsigned short)*maxVertices);
+	
+	dtFixedArray<unsigned short> indices(alloc, maxVertsPerCont);
+	if (!indices)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	
+	dtFixedArray<unsigned short> tris(alloc, maxVertsPerCont*3);
+	if (!tris)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+
+	dtFixedArray<unsigned short> polys(alloc, maxVertsPerCont*MAX_VERTS_PER_POLY);
+	if (!polys)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	
+	for (int i = 0; i < lcset.nconts; ++i)
+	{
+		dtTileCacheContour& cont = lcset.conts[i];
+		
+		// Skip null contours.
+		if (cont.nverts < 3)
+			continue;
+		
+		// Triangulate contour
+		for (int j = 0; j < cont.nverts; ++j)
+			indices[j] = (unsigned short)j;
+		
+		int ntris = triangulate(cont.nverts, cont.verts, &indices[0], &tris[0]);
+		if (ntris <= 0)
+		{
+			// TODO: issue warning!
+			ntris = -ntris;
+		}
+		
+		// Add and merge vertices.
+		for (int j = 0; j < cont.nverts; ++j)
+		{
+			const unsigned char* v = &cont.verts[j*4];
+			indices[j] = addVertex((unsigned short)v[0], (unsigned short)v[1], (unsigned short)v[2],
+								   mesh.verts, firstVert, nextVert, mesh.nverts);
+			if (v[3] & 0x80)
+			{
+				// This vertex should be removed.
+				vflags[indices[j]] = 1;
+			}
+		}
+		
+		// Build initial polygons.
+		int npolys = 0;
+		memset(polys, 0xff, sizeof(unsigned short) * maxVertsPerCont * MAX_VERTS_PER_POLY);
+		for (int j = 0; j < ntris; ++j)
+		{
+			const unsigned short* t = &tris[j*3];
+			if (t[0] != t[1] && t[0] != t[2] && t[1] != t[2])
+			{
+				polys[npolys*MAX_VERTS_PER_POLY+0] = indices[t[0]];
+				polys[npolys*MAX_VERTS_PER_POLY+1] = indices[t[1]];
+				polys[npolys*MAX_VERTS_PER_POLY+2] = indices[t[2]];
+				npolys++;
+			}
+		}
+		if (!npolys)
+			continue;
+		
+		// Merge polygons.
+		int maxVertsPerPoly =MAX_VERTS_PER_POLY ;
+		if (maxVertsPerPoly > 3)
+		{
+			for(;;)
+			{
+				// Find best polygons to merge.
+				int bestMergeVal = 0;
+				int bestPa = 0, bestPb = 0, bestEa = 0, bestEb = 0;
+				
+				for (int j = 0; j < npolys-1; ++j)
+				{
+					unsigned short* pj = &polys[j*MAX_VERTS_PER_POLY];
+					for (int k = j+1; k < npolys; ++k)
+					{
+						unsigned short* pk = &polys[k*MAX_VERTS_PER_POLY];
+						int ea, eb;
+						int v = getPolyMergeValue(pj, pk, mesh.verts, ea, eb);
+						if (v > bestMergeVal)
+						{
+							bestMergeVal = v;
+							bestPa = j;
+							bestPb = k;
+							bestEa = ea;
+							bestEb = eb;
+						}
+					}
+				}
+				
+				if (bestMergeVal > 0)
+				{
+					// Found best, merge.
+					unsigned short* pa = &polys[bestPa*MAX_VERTS_PER_POLY];
+					unsigned short* pb = &polys[bestPb*MAX_VERTS_PER_POLY];
+					mergePolys(pa, pb, bestEa, bestEb);
+					memcpy(pb, &polys[(npolys-1)*MAX_VERTS_PER_POLY], sizeof(unsigned short)*MAX_VERTS_PER_POLY);
+					npolys--;
+				}
+				else
+				{
+					// Could not merge any polygons, stop.
+					break;
+				}
+			}
+		}
+		
+		// Store polygons.
+		for (int j = 0; j < npolys; ++j)
+		{
+			unsigned short* p = &mesh.polys[mesh.npolys*MAX_VERTS_PER_POLY*2];
+			unsigned short* q = &polys[j*MAX_VERTS_PER_POLY];
+			for (int k = 0; k < MAX_VERTS_PER_POLY; ++k)
+				p[k] = q[k];
+			mesh.areas[mesh.npolys] = cont.area;
+			mesh.npolys++;
+			if (mesh.npolys > maxTris)
+				return DT_FAILURE | DT_BUFFER_TOO_SMALL;
+		}
+	}
+	
+	
+	// Remove edge vertices.
+	for (int i = 0; i < mesh.nverts; ++i)
+	{
+		if (vflags[i])
+		{
+			if (!canRemoveVertex(mesh, (unsigned short)i))
+				continue;
+			dtStatus status = removeVertex(mesh, (unsigned short)i, maxTris);
+			if (dtStatusFailed(status))
+				return status;
+			// Remove vertex
+			// Note: mesh.nverts is already decremented inside removeVertex()!
+			for (int j = i; j < mesh.nverts; ++j)
+				vflags[j] = vflags[j+1];
+			--i;
+		}
+	}
+	
+	// Calculate adjacency.
+	if (!buildMeshAdjacency(alloc, mesh.polys, mesh.npolys, mesh.verts, mesh.nverts, lcset))
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+		
+	return DT_SUCCESS;
+}
+
+dtStatus dtMarkCylinderArea(dtTileCacheLayer& layer, const float* orig, const float cs, const float ch,
+							const float* pos, const float radius, const float height, const unsigned char areaId)
+{
+	float bmin[3], bmax[3];
+	bmin[0] = pos[0] - radius;
+	bmin[1] = pos[1];
+	bmin[2] = pos[2] - radius;
+	bmax[0] = pos[0] + radius;
+	bmax[1] = pos[1] + height;
+	bmax[2] = pos[2] + radius;
+	const float r2 = dtSqr(radius/cs + 0.5f);
+
+	const int w = (int)layer.header->width;
+	const int h = (int)layer.header->height;
+	const float ics = 1.0f/cs;
+	const float ich = 1.0f/ch;
+	
+	const float px = (pos[0]-orig[0])*ics;
+	const float pz = (pos[2]-orig[2])*ics;
+	
+	int minx = (int)floorf((bmin[0]-orig[0])*ics);
+	int miny = (int)floorf((bmin[1]-orig[1])*ich);
+	int minz = (int)floorf((bmin[2]-orig[2])*ics);
+	int maxx = (int)floorf((bmax[0]-orig[0])*ics);
+	int maxy = (int)floorf((bmax[1]-orig[1])*ich);
+	int maxz = (int)floorf((bmax[2]-orig[2])*ics);
+
+	if (maxx < 0) return DT_SUCCESS;
+	if (minx >= w) return DT_SUCCESS;
+	if (maxz < 0) return DT_SUCCESS;
+	if (minz >= h) return DT_SUCCESS;
+	
+	if (minx < 0) minx = 0;
+	if (maxx >= w) maxx = w-1;
+	if (minz < 0) minz = 0;
+	if (maxz >= h) maxz = h-1;
+	
+	for (int z = minz; z <= maxz; ++z)
+	{
+		for (int x = minx; x <= maxx; ++x)
+		{
+			const float dx = (float)(x+0.5f) - px;
+			const float dz = (float)(z+0.5f) - pz;
+			if (dx*dx + dz*dz > r2)
+				continue;
+			const int y = layer.heights[x+z*w];
+			if (y < miny || y > maxy)
+				continue;
+			layer.areas[x+z*w] = areaId;
+		}
+	}
+
+	return DT_SUCCESS;
+}
+
+
+dtStatus dtBuildTileCacheLayer(dtTileCacheCompressor* comp,
+							   dtTileCacheLayerHeader* header,
+							   const unsigned char* heights,
+							   const unsigned char* areas,
+							   const unsigned char* cons,
+							   unsigned char** outData, int* outDataSize)
+{
+	const int headerSize = dtAlign4(sizeof(dtTileCacheLayerHeader));
+	const int gridSize = (int)header->width * (int)header->height;
+	const int maxDataSize = headerSize + comp->maxCompressedSize(gridSize*3);
+	unsigned char* data = (unsigned char*)dtAlloc(maxDataSize, DT_ALLOC_PERM);
+	if (!data)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(data, 0, maxDataSize);
+	
+	// Store header
+	memcpy(data, header, sizeof(dtTileCacheLayerHeader));
+	
+	// Concatenate grid data for compression.
+	const int bufferSize = gridSize*3;
+	unsigned char* buffer = (unsigned char*)dtAlloc(bufferSize, DT_ALLOC_TEMP);
+	if (!buffer)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memcpy(buffer, heights, gridSize);
+	memcpy(buffer+gridSize, areas, gridSize);
+	memcpy(buffer+gridSize*2, cons, gridSize);
+	
+	// Compress
+	unsigned char* compressed = data + headerSize;
+	const int maxCompressedSize = maxDataSize - headerSize;
+	int compressedSize = 0;
+	dtStatus status = comp->compress(buffer, bufferSize, compressed, maxCompressedSize, &compressedSize);
+	if (dtStatusFailed(status))
+		return status;
+
+	*outData = data;
+	*outDataSize = headerSize + compressedSize;
+	
+	dtFree(buffer);
+	
+	return DT_SUCCESS;
+}
+
+void dtFreeTileCacheLayer(dtTileCacheAlloc* alloc, dtTileCacheLayer* layer)
+{
+	dtAssert(alloc);
+	// The layer is allocated as one conitguous blob of data.
+	alloc->free(layer);
+}
+
+dtStatus dtDecompressTileCacheLayer(dtTileCacheAlloc* alloc, dtTileCacheCompressor* comp,
+									unsigned char* compressed, const int compressedSize,
+									dtTileCacheLayer** layerOut)
+{
+	dtAssert(alloc);
+	dtAssert(comp);
+
+	if (!layerOut)
+		return DT_FAILURE | DT_INVALID_PARAM;
+	if (!compressed)
+		return DT_FAILURE | DT_INVALID_PARAM;
+
+	*layerOut = 0;
+
+	dtTileCacheLayerHeader* compressedHeader = (dtTileCacheLayerHeader*)compressed;
+	if (compressedHeader->magic != DT_TILECACHE_MAGIC)
+		return DT_FAILURE | DT_WRONG_MAGIC;
+	if (compressedHeader->version != DT_TILECACHE_VERSION)
+		return DT_FAILURE | DT_WRONG_VERSION;
+	
+	const int layerSize = dtAlign4(sizeof(dtTileCacheLayer));
+	const int headerSize = dtAlign4(sizeof(dtTileCacheLayerHeader));
+	const int gridSize = (int)compressedHeader->width * (int)compressedHeader->height;
+	const int bufferSize = layerSize + headerSize + gridSize*4;
+	
+	unsigned char* buffer = (unsigned char*)alloc->alloc(bufferSize);
+	if (!buffer)
+		return DT_FAILURE | DT_OUT_OF_MEMORY;
+	memset(buffer, 0, bufferSize);
+
+	dtTileCacheLayer* layer = (dtTileCacheLayer*)buffer;
+	dtTileCacheLayerHeader* header = (dtTileCacheLayerHeader*)(buffer + layerSize);
+	unsigned char* grids = buffer + layerSize + headerSize;
+	const int gridsSize = bufferSize - (layerSize + headerSize); 
+	
+	// Copy header
+	memcpy(header, compressedHeader, headerSize);
+	// Decompress grid.
+	int size = 0;
+	dtStatus status = comp->decompress(compressed+headerSize, compressedSize-headerSize,
+									   grids, gridsSize, &size);
+	if (dtStatusFailed(status))
+	{
+		dtFree(buffer);
+		return status;
+	}
+	
+	layer->header = header;
+	layer->heights = grids;
+	layer->areas = grids + gridSize;
+	layer->cons = grids + gridSize*2;
+	layer->regs = grids + gridSize*3;
+	
+	*layerOut = layer;
+	
+	return DT_SUCCESS;
+}
+
+
+
+bool dtTileCacheHeaderSwapEndian(unsigned char* data, const int dataSize)
+{
+	dtTileCacheLayerHeader* header = (dtTileCacheLayerHeader*)data;
+	
+	int swappedMagic = DT_TILECACHE_MAGIC;
+	int swappedVersion = DT_TILECACHE_VERSION;
+	dtSwapEndian(&swappedMagic);
+	dtSwapEndian(&swappedVersion);
+	
+	if ((header->magic != DT_TILECACHE_MAGIC || header->version != DT_TILECACHE_VERSION) &&
+		(header->magic != swappedMagic || header->version != swappedVersion))
+	{
+		return false;
+	}
+	
+	dtSwapEndian(&header->magic);
+	dtSwapEndian(&header->version);
+	dtSwapEndian(&header->tx);
+	dtSwapEndian(&header->ty);
+	dtSwapEndian(&header->tlayer);
+	dtSwapEndian(&header->bmin[0]);
+	dtSwapEndian(&header->bmin[1]);
+	dtSwapEndian(&header->bmin[2]);
+	dtSwapEndian(&header->bmax[0]);
+	dtSwapEndian(&header->bmax[1]);
+	dtSwapEndian(&header->bmax[2]);
+	dtSwapEndian(&header->hmin);
+	dtSwapEndian(&header->hmax);
+	
+	// width, height, minx, maxx, miny, maxy are unsigned char, no need to swap.
+	
+	return true;
+}
+
diff --git a/Engine/lib/recast/License.txt b/Engine/lib/recast/License.txt
new file mode 100644
index 000000000..95f4bfc96
--- /dev/null
+++ b/Engine/lib/recast/License.txt
@@ -0,0 +1,18 @@
+Copyright (c) 2009 Mikko Mononen memon@inside.org
+
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+claim that you wrote the original software. If you use this software
+in a product, an acknowledgment in the product documentation would be
+appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
diff --git a/Engine/lib/recast/Readme.txt b/Engine/lib/recast/Readme.txt
new file mode 100644
index 000000000..0c2f7b167
--- /dev/null
+++ b/Engine/lib/recast/Readme.txt
@@ -0,0 +1,120 @@
+
+Recast & Detour Version 1.4
+
+
+Recast
+
+Recast is state of the art navigation mesh construction toolset for games.
+
+    * It is automatic, which means that you can throw any level geometry
+      at it and you will get robust mesh out
+    * It is fast which means swift turnaround times for level designers
+    * It is open source so it comes with full source and you can
+      customize it to your hearts content. 
+
+The Recast process starts with constructing a voxel mold from a level geometry 
+and then casting a navigation mesh over it. The process consists of three steps, 
+building the voxel mold, partitioning the mold into simple regions, peeling off 
+the regions as simple polygons.
+
+   1. The voxel mold is build from the input triangle mesh by rasterizing 
+      the triangles into a multi-layer heightfield. Some simple filters are 
+      then applied to the mold to prune out locations where the character 
+      would not be able to move.
+   2. The walkable areas described by the mold are divided into simple 
+      overlayed 2D regions. The resulting regions have only one non-overlapping 
+      contour, which simplifies the final step of the process tremendously.
+   3. The navigation polygons are peeled off from the regions by first tracing 
+      the boundaries and then simplifying them. The resulting polygons are 
+      finally converted to convex polygons which makes them perfect for 
+      pathfinding and spatial reasoning about the level. 
+
+The toolset code is located in the Recast folder and demo application using the Recast
+toolset is located in the RecastDemo folder.
+
+The project files with this distribution can be compiled with Microsoft Visual C++ 2008
+(you can download it for free) and XCode 3.1.
+
+
+Detour
+
+Recast is accompanied with Detour, path-finding and spatial reasoning toolkit. You can use any navigation mesh with Detour, but of course the data generated with Recast fits perfectly.
+
+Detour offers simple static navigation mesh which is suitable for many simple cases, as well as tiled navigation mesh which allows you to plug in and out pieces of the mesh. The tiled mesh allows to create systems where you stream new navigation data in and out as the player progresses the level, or you may regenerate tiles as the world changes. 
+
+
+Latest code available at http://code.google.com/p/recastnavigation/
+
+
+--
+
+Release Notes
+
+----------------
+* Recast 1.4
+  Released August 24th, 2009
+
+- Added detail height mesh generation (RecastDetailMesh.cpp) for single,
+  tiled statmeshes as well as tilemesh.
+- Added feature to contour tracing which detects extra vertices along
+  tile edges which should be removed later.
+- Changed the tiled stat mesh preprocess, so that it first generated
+  polymeshes per tile and finally combines them.
+- Fixed bug in the GUI code where invisible buttons could be pressed.
+
+----------------
+* Recast 1.31
+  Released July 24th, 2009
+
+- Better cost and heuristic functions.
+- Fixed tile navmesh raycast on tile borders.
+
+----------------
+* Recast 1.3
+  Released July 14th, 2009
+
+- Added dtTileNavMesh which allows to dynamically add and remove navmesh pieces at runtime.
+- Renamed stat navmesh types to dtStat* (i.e. dtPoly is now dtStatPoly).
+- Moved common code used by tile and stat navmesh to DetourNode.h/cpp and DetourCommon.h/cpp.
+- Refactores the demo code.
+
+----------------
+* Recast 1.2
+  Released June 17th, 2009
+
+- Added tiled mesh generation. The tiled generation allows to generate navigation for
+  much larger worlds, it removes some of the artifacts that comes from distance fields
+  in open areas, and allows later streaming and dynamic runtime generation
+- Improved and added some debug draw modes
+- API change: The helper function rcBuildNavMesh does not exists anymore,
+  had to change few internal things to cope with the tiled processing,
+  similar API functionality will be added later once the tiled process matures
+- The demo is getting way too complicated, need to split demos
+- Fixed several filtering functions so that the mesh is tighter to the geometry,
+  sometimes there could be up error up to tow voxel units close to walls,
+  now it should be just one.
+
+----------------
+* Recast 1.1
+  Released April 11th, 2009
+
+This is the first release of Detour.
+
+----------------
+* Recast 1.0
+  Released March 29th, 2009
+
+This is the first release of Recast.
+
+The process is not always as robust as I would wish. The watershed phase sometimes swallows tiny islands
+which are close to edges. These droppings are handled in rcBuildContours, but the code is not
+particularly robust either.
+
+Another non-robust case is when portal contours (contours shared between two regions) are always
+assumed to be straight. That can lead to overlapping contours specially when the level has
+large open areas.
+
+
+
+Mikko Mononen
+memon@inside.org
diff --git a/Engine/lib/recast/Recast/CMakeLists.txt b/Engine/lib/recast/Recast/CMakeLists.txt
new file mode 100644
index 000000000..202304897
--- /dev/null
+++ b/Engine/lib/recast/Recast/CMakeLists.txt
@@ -0,0 +1,24 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+SET(recast_SRCS
+	Source/Recast.cpp
+	Source/RecastArea.cpp
+	Source/RecastAlloc.cpp
+	Source/RecastContour.cpp
+	Source/RecastFilter.cpp
+	Source/RecastLayers.cpp
+	Source/RecastMesh.cpp
+	Source/RecastMeshDetail.cpp
+	Source/RecastRasterization.cpp
+	Source/RecastRegion.cpp
+)
+
+SET(recast_HDRS
+	Include/Recast.h
+	Include/RecastAlloc.h
+	Include/RecastAssert.h
+)
+
+INCLUDE_DIRECTORIES(Include)
+
+ADD_LIBRARY(Recast ${recast_SRCS} ${recast_HDRS})
diff --git a/Engine/lib/recast/Recast/Include/Recast.h b/Engine/lib/recast/Recast/Include/Recast.h
new file mode 100644
index 000000000..1ea40a3f4
--- /dev/null
+++ b/Engine/lib/recast/Recast/Include/Recast.h
@@ -0,0 +1,1130 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+ 
+#ifndef RECAST_H
+#define RECAST_H
+
+/// The value of PI used by Recast.
+static const float RC_PI = 3.14159265f;
+
+/// Recast log categories.
+/// @see rcContext
+enum rcLogCategory
+{
+	RC_LOG_PROGRESS = 1,	///< A progress log entry.
+	RC_LOG_WARNING,			///< A warning log entry.
+	RC_LOG_ERROR,			///< An error log entry.
+};
+
+/// Recast performance timer categories.
+/// @see rcContext
+enum rcTimerLabel
+{
+	/// The user defined total time of the build.
+	RC_TIMER_TOTAL,
+	/// A user defined build time.
+	RC_TIMER_TEMP,
+	/// The time to rasterize the triangles. (See: #rcRasterizeTriangle)
+	RC_TIMER_RASTERIZE_TRIANGLES,
+	/// The time to build the compact heightfield. (See: #rcBuildCompactHeightfield)
+	RC_TIMER_BUILD_COMPACTHEIGHTFIELD,
+	/// The total time to build the contours. (See: #rcBuildContours)
+	RC_TIMER_BUILD_CONTOURS,
+	/// The time to trace the boundaries of the contours. (See: #rcBuildContours)
+	RC_TIMER_BUILD_CONTOURS_TRACE,
+	/// The time to simplify the contours. (See: #rcBuildContours)
+	RC_TIMER_BUILD_CONTOURS_SIMPLIFY,
+	/// The time to filter ledge spans. (See: #rcFilterLedgeSpans)
+	RC_TIMER_FILTER_BORDER,
+	/// The time to filter low height spans. (See: #rcFilterWalkableLowHeightSpans)
+	RC_TIMER_FILTER_WALKABLE,
+	/// The time to apply the median filter. (See: #rcMedianFilterWalkableArea)
+	RC_TIMER_MEDIAN_AREA,
+	/// The time to filter low obstacles. (See: #rcFilterLowHangingWalkableObstacles)
+	RC_TIMER_FILTER_LOW_OBSTACLES,
+	/// The time to build the polygon mesh. (See: #rcBuildPolyMesh)
+	RC_TIMER_BUILD_POLYMESH,
+	/// The time to merge polygon meshes. (See: #rcMergePolyMeshes)
+	RC_TIMER_MERGE_POLYMESH,
+	/// The time to erode the walkable area. (See: #rcErodeWalkableArea)
+	RC_TIMER_ERODE_AREA,
+	/// The time to mark a box area. (See: #rcMarkBoxArea)
+	RC_TIMER_MARK_BOX_AREA,
+	/// The time to mark a cylinder area. (See: #rcMarkCylinderArea)
+	RC_TIMER_MARK_CYLINDER_AREA,
+	/// The time to mark a convex polygon area. (See: #rcMarkConvexPolyArea)
+	RC_TIMER_MARK_CONVEXPOLY_AREA,
+	/// The total time to build the distance field. (See: #rcBuildDistanceField)
+	RC_TIMER_BUILD_DISTANCEFIELD,
+	/// The time to build the distances of the distance field. (See: #rcBuildDistanceField)
+	RC_TIMER_BUILD_DISTANCEFIELD_DIST,
+	/// The time to blur the distance field. (See: #rcBuildDistanceField)
+	RC_TIMER_BUILD_DISTANCEFIELD_BLUR,
+	/// The total time to build the regions. (See: #rcBuildRegions, #rcBuildRegionsMonotone)
+	RC_TIMER_BUILD_REGIONS,
+	/// The total time to apply the watershed algorithm. (See: #rcBuildRegions)
+	RC_TIMER_BUILD_REGIONS_WATERSHED,
+	/// The time to expand regions while applying the watershed algorithm. (See: #rcBuildRegions)
+	RC_TIMER_BUILD_REGIONS_EXPAND,
+	/// The time to flood regions while applying the watershed algorithm. (See: #rcBuildRegions)
+	RC_TIMER_BUILD_REGIONS_FLOOD,
+	/// The time to filter out small regions. (See: #rcBuildRegions, #rcBuildRegionsMonotone)
+	RC_TIMER_BUILD_REGIONS_FILTER,
+	/// The time to build heightfield layers. (See: #rcBuildHeightfieldLayers)
+	RC_TIMER_BUILD_LAYERS, 
+	/// The time to build the polygon mesh detail. (See: #rcBuildPolyMeshDetail)
+	RC_TIMER_BUILD_POLYMESHDETAIL,
+	/// The time to merge polygon mesh details. (See: #rcMergePolyMeshDetails)
+	RC_TIMER_MERGE_POLYMESHDETAIL,
+	/// The maximum number of timers.  (Used for iterating timers.)
+	RC_MAX_TIMERS
+};
+
+/// Provides an interface for optional logging and performance tracking of the Recast 
+/// build process.
+/// @ingroup recast
+class rcContext
+{
+public:
+
+	/// Contructor.
+	///  @param[in]		state	TRUE if the logging and performance timers should be enabled.  [Default: true]
+	inline rcContext(bool state = true) : m_logEnabled(state), m_timerEnabled(state) {}
+	virtual ~rcContext() {}
+
+	/// Enables or disables logging.
+	///  @param[in]		state	TRUE if logging should be enabled.
+	inline void enableLog(bool state) { m_logEnabled = state; }
+
+	/// Clears all log entries.
+	inline void resetLog() { if (m_logEnabled) doResetLog(); }
+
+	/// Logs a message.
+	///  @param[in]		category	The category of the message.
+	///  @param[in]		format		The message.
+	void log(const rcLogCategory category, const char* format, ...);
+
+	/// Enables or disables the performance timers.
+	///  @param[in]		state	TRUE if timers should be enabled.
+	inline void enableTimer(bool state) { m_timerEnabled = state; }
+
+	/// Clears all peformance timers. (Resets all to unused.)
+	inline void resetTimers() { if (m_timerEnabled) doResetTimers(); }
+
+	/// Starts the specified performance timer.
+	///  @param	label	The category of timer.
+	inline void startTimer(const rcTimerLabel label) { if (m_timerEnabled) doStartTimer(label); }
+
+	/// Stops the specified performance timer.
+	///  @param	label	The category of the timer.
+	inline void stopTimer(const rcTimerLabel label) { if (m_timerEnabled) doStopTimer(label); }
+
+	/// Returns the total accumulated time of the specified performance timer.
+	///  @param	label	The category of the timer.
+	///  @return The accumulated time of the timer, or -1 if timers are disabled or the timer has never been started.
+	inline int getAccumulatedTime(const rcTimerLabel label) const { return m_timerEnabled ? doGetAccumulatedTime(label) : -1; }
+
+protected:
+
+	/// Clears all log entries.
+	virtual void doResetLog() {}
+
+	/// Logs a message.
+	///  @param[in]		category	The category of the message.
+	///  @param[in]		msg			The formatted message.
+	///  @param[in]		len			The length of the formatted message.
+	virtual void doLog(const rcLogCategory /*category*/, const char* /*msg*/, const int /*len*/) {}
+
+	/// Clears all timers. (Resets all to unused.)
+	virtual void doResetTimers() {}
+
+	/// Starts the specified performance timer.
+	///  @param[in]		label	The category of timer.
+	virtual void doStartTimer(const rcTimerLabel /*label*/) {}
+
+	/// Stops the specified performance timer.
+	///  @param[in]		label	The category of the timer.
+	virtual void doStopTimer(const rcTimerLabel /*label*/) {}
+
+	/// Returns the total accumulated time of the specified performance timer.
+	///  @param[in]		label	The category of the timer.
+	///  @return The accumulated time of the timer, or -1 if timers are disabled or the timer has never been started.
+	virtual int doGetAccumulatedTime(const rcTimerLabel /*label*/) const { return -1; }
+	
+	/// True if logging is enabled.
+	bool m_logEnabled;
+
+	/// True if the performance timers are enabled.
+	bool m_timerEnabled;
+};
+
+/// Specifies a configuration to use when performing Recast builds.
+/// @ingroup recast
+struct rcConfig
+{
+	/// The width of the field along the x-axis. [Limit: >= 0] [Units: vx]
+	int width;
+
+	/// The height of the field along the z-axis. [Limit: >= 0] [Units: vx]
+	int height;
+	
+	/// The width/height size of tile's on the xz-plane. [Limit: >= 0] [Units: vx]
+	int tileSize;
+	
+	/// The size of the non-navigable border around the heightfield. [Limit: >=0] [Units: vx]
+	int borderSize;
+
+	/// The xz-plane cell size to use for fields. [Limit: > 0] [Units: wu] 
+	float cs;
+
+	/// The y-axis cell size to use for fields. [Limit: > 0] [Units: wu]
+	float ch;
+
+	/// The minimum bounds of the field's AABB. [(x, y, z)] [Units: wu]
+	float bmin[3]; 
+
+	/// The maximum bounds of the field's AABB. [(x, y, z)] [Units: wu]
+	float bmax[3];
+
+	/// The maximum slope that is considered walkable. [Limits: 0 <= value < 90] [Units: Degrees] 
+	float walkableSlopeAngle;
+
+	/// Minimum floor to 'ceiling' height that will still allow the floor area to 
+	/// be considered walkable. [Limit: >= 3] [Units: vx] 
+	int walkableHeight;
+	
+	/// Maximum ledge height that is considered to still be traversable. [Limit: >=0] [Units: vx] 
+	int walkableClimb;
+	
+	/// The distance to erode/shrink the walkable area of the heightfield away from 
+	/// obstructions.  [Limit: >=0] [Units: vx] 
+	int walkableRadius;
+	
+	/// The maximum allowed length for contour edges along the border of the mesh. [Limit: >=0] [Units: vx] 
+	int maxEdgeLen;
+	
+	/// The maximum distance a simplfied contour's border edges should deviate 
+	/// the original raw contour. [Limit: >=0] [Units: wu]
+	float maxSimplificationError;
+	
+	/// The minimum number of cells allowed to form isolated island areas. [Limit: >=0] [Units: vx] 
+	int minRegionArea;
+	
+	/// Any regions with a span count smaller than this value will, if possible, 
+	/// be merged with larger regions. [Limit: >=0] [Units: vx] 
+	int mergeRegionArea;
+	
+	/// The maximum number of vertices allowed for polygons generated during the 
+	/// contour to polygon conversion process. [Limit: >= 3] 
+	int maxVertsPerPoly;
+	
+	/// Sets the sampling distance to use when generating the detail mesh.
+	/// (For height detail only.) [Limits: 0 or >= 0.9] [Units: wu] 
+	float detailSampleDist;
+	
+	/// The maximum distance the detail mesh surface should deviate from heightfield
+	/// data. (For height detail only.) [Limit: >=0] [Units: wu] 
+	float detailSampleMaxError;
+};
+
+/// Defines the number of bits allocated to rcSpan::smin and rcSpan::smax.
+static const int RC_SPAN_HEIGHT_BITS = 13;
+/// Defines the maximum value for rcSpan::smin and rcSpan::smax.
+static const int RC_SPAN_MAX_HEIGHT = (1<<RC_SPAN_HEIGHT_BITS)-1;
+
+/// The number of spans allocated per span spool.
+/// @see rcSpanPool
+static const int RC_SPANS_PER_POOL = 2048;
+
+/// Represents a span in a heightfield.
+/// @see rcHeightfield
+struct rcSpan
+{
+	unsigned int smin : 13;			///< The lower limit of the span. [Limit: < #smax]
+	unsigned int smax : 13;			///< The upper limit of the span. [Limit: <= #RC_SPAN_MAX_HEIGHT]
+	unsigned int area : 6;			///< The area id assigned to the span.
+	rcSpan* next;					///< The next span higher up in column.
+};
+
+/// A memory pool used for quick allocation of spans within a heightfield.
+/// @see rcHeightfield
+struct rcSpanPool
+{
+	rcSpanPool* next;					///< The next span pool.
+	rcSpan items[RC_SPANS_PER_POOL];	///< Array of spans in the pool.
+};
+
+/// A dynamic heightfield representing obstructed space.
+/// @ingroup recast
+struct rcHeightfield
+{
+	int width;			///< The width of the heightfield. (Along the x-axis in cell units.)
+	int height;			///< The height of the heightfield. (Along the z-axis in cell units.)
+	float bmin[3];  	///< The minimum bounds in world space. [(x, y, z)]
+	float bmax[3];		///< The maximum bounds in world space. [(x, y, z)]
+	float cs;			///< The size of each cell. (On the xz-plane.)
+	float ch;			///< The height of each cell. (The minimum increment along the y-axis.)
+	rcSpan** spans;		///< Heightfield of spans (width*height).
+	rcSpanPool* pools;	///< Linked list of span pools.
+	rcSpan* freelist;	///< The next free span.
+};
+
+/// Provides information on the content of a cell column in a compact heightfield. 
+struct rcCompactCell
+{
+	unsigned int index : 24;	///< Index to the first span in the column.
+	unsigned int count : 8;		///< Number of spans in the column.
+};
+
+/// Represents a span of unobstructed space within a compact heightfield.
+struct rcCompactSpan
+{
+	unsigned short y;			///< The lower extent of the span. (Measured from the heightfield's base.)
+	unsigned short reg;			///< The id of the region the span belongs to. (Or zero if not in a region.)
+	unsigned int con : 24;		///< Packed neighbor connection data.
+	unsigned int h : 8;			///< The height of the span.  (Measured from #y.)
+};
+
+/// A compact, static heightfield representing unobstructed space.
+/// @ingroup recast
+struct rcCompactHeightfield
+{
+	int width;					///< The width of the heightfield. (Along the x-axis in cell units.)
+	int height;					///< The height of the heightfield. (Along the z-axis in cell units.)
+	int spanCount;				///< The number of spans in the heightfield.
+	int walkableHeight;			///< The walkable height used during the build of the field.  (See: rcConfig::walkableHeight)
+	int walkableClimb;			///< The walkable climb used during the build of the field. (See: rcConfig::walkableClimb)
+	int borderSize;				///< The AABB border size used during the build of the field. (See: rcConfig::borderSize)
+	unsigned short maxDistance;	///< The maximum distance value of any span within the field. 
+	unsigned short maxRegions;	///< The maximum region id of any span within the field. 
+	float bmin[3];				///< The minimum bounds in world space. [(x, y, z)]
+	float bmax[3];				///< The maximum bounds in world space. [(x, y, z)]
+	float cs;					///< The size of each cell. (On the xz-plane.)
+	float ch;					///< The height of each cell. (The minimum increment along the y-axis.)
+	rcCompactCell* cells;		///< Array of cells. [Size: #width*#height]
+	rcCompactSpan* spans;		///< Array of spans. [Size: #spanCount]
+	unsigned short* dist;		///< Array containing border distance data. [Size: #spanCount]
+	unsigned char* areas;		///< Array containing area id data. [Size: #spanCount]
+};
+
+/// Represents a heightfield layer within a layer set.
+/// @see rcHeightfieldLayerSet
+struct rcHeightfieldLayer
+{
+	float bmin[3];				///< The minimum bounds in world space. [(x, y, z)]
+	float bmax[3];				///< The maximum bounds in world space. [(x, y, z)]
+	float cs;					///< The size of each cell. (On the xz-plane.)
+	float ch;					///< The height of each cell. (The minimum increment along the y-axis.)
+	int width;					///< The width of the heightfield. (Along the x-axis in cell units.)
+	int height;					///< The height of the heightfield. (Along the z-axis in cell units.)
+	int minx;					///< The minimum x-bounds of usable data.
+	int maxx;					///< The maximum x-bounds of usable data.
+	int miny;					///< The minimum y-bounds of usable data. (Along the z-axis.)
+	int maxy;					///< The maximum y-bounds of usable data. (Along the z-axis.)
+	int hmin;					///< The minimum height bounds of usable data. (Along the y-axis.)
+	int hmax;					///< The maximum height bounds of usable data. (Along the y-axis.)
+	unsigned char* heights;		///< The heightfield. [Size: (width - borderSize*2) * (h - borderSize*2)]
+	unsigned char* areas;		///< Area ids. [Size: Same as #heights]
+	unsigned char* cons;		///< Packed neighbor connection information. [Size: Same as #heights]
+};
+
+/// Represents a set of heightfield layers.
+/// @ingroup recast
+/// @see rcAllocHeightfieldLayerSet, rcFreeHeightfieldLayerSet 
+struct rcHeightfieldLayerSet
+{
+	rcHeightfieldLayer* layers;			///< The layers in the set. [Size: #nlayers]
+	int nlayers;						///< The number of layers in the set.
+};
+
+/// Represents a simple, non-overlapping contour in field space.
+struct rcContour
+{
+	int* verts;			///< Simplified contour vertex and connection data. [Size: 4 * #nverts]
+	int nverts;			///< The number of vertices in the simplified contour. 
+	int* rverts;		///< Raw contour vertex and connection data. [Size: 4 * #nrverts]
+	int nrverts;		///< The number of vertices in the raw contour. 
+	unsigned short reg;	///< The region id of the contour.
+	unsigned char area;	///< The area id of the contour.
+};
+
+/// Represents a group of related contours.
+/// @ingroup recast
+struct rcContourSet
+{
+	rcContour* conts;	///< An array of the contours in the set. [Size: #nconts]
+	int nconts;			///< The number of contours in the set.
+	float bmin[3];  	///< The minimum bounds in world space. [(x, y, z)]
+	float bmax[3];		///< The maximum bounds in world space. [(x, y, z)]
+	float cs;			///< The size of each cell. (On the xz-plane.)
+	float ch;			///< The height of each cell. (The minimum increment along the y-axis.)
+	int width;			///< The width of the set. (Along the x-axis in cell units.) 
+	int height;			///< The height of the set. (Along the z-axis in cell units.) 
+	int borderSize;		///< The AABB border size used to generate the source data from which the contours were derived.
+};
+
+/// Represents a polygon mesh suitable for use in building a navigation mesh. 
+/// @ingroup recast
+struct rcPolyMesh
+{
+	unsigned short* verts;	///< The mesh vertices. [Form: (x, y, z) * #nverts]
+	unsigned short* polys;	///< Polygon and neighbor data. [Length: #maxpolys * 2 * #nvp]
+	unsigned short* regs;	///< The region id assigned to each polygon. [Length: #maxpolys]
+	unsigned short* flags;	///< The user defined flags for each polygon. [Length: #maxpolys]
+	unsigned char* areas;	///< The area id assigned to each polygon. [Length: #maxpolys]
+	int nverts;				///< The number of vertices.
+	int npolys;				///< The number of polygons.
+	int maxpolys;			///< The number of allocated polygons.
+	int nvp;				///< The maximum number of vertices per polygon.
+	float bmin[3];			///< The minimum bounds in world space. [(x, y, z)]
+	float bmax[3];			///< The maximum bounds in world space. [(x, y, z)]
+	float cs;				///< The size of each cell. (On the xz-plane.)
+	float ch;				///< The height of each cell. (The minimum increment along the y-axis.)
+	int borderSize;			///< The AABB border size used to generate the source data from which the mesh was derived.
+};
+
+/// Contains triangle meshes that represent detailed height data associated 
+/// with the polygons in its associated polygon mesh object.
+/// @ingroup recast
+struct rcPolyMeshDetail
+{
+	unsigned int* meshes;	///< The sub-mesh data. [Size: 4*#nmeshes] 
+	float* verts;			///< The mesh vertices. [Size: 3*#nverts] 
+	unsigned char* tris;	///< The mesh triangles. [Size: 4*#ntris] 
+	int nmeshes;			///< The number of sub-meshes defined by #meshes.
+	int nverts;				///< The number of vertices in #verts.
+	int ntris;				///< The number of triangles in #tris.
+};
+
+/// @name Allocation Functions
+/// Functions used to allocate and de-allocate Recast objects.
+/// @see rcAllocSetCustom
+/// @{
+
+/// Allocates a heightfield object using the Recast allocator.
+///  @return A heightfield that is ready for initialization, or null on failure.
+///  @ingroup recast
+///  @see rcCreateHeightfield, rcFreeHeightField
+rcHeightfield* rcAllocHeightfield();
+
+/// Frees the specified heightfield object using the Recast allocator.
+///  @param[in]		hf	A heightfield allocated using #rcAllocHeightfield
+///  @ingroup recast
+///  @see rcAllocHeightfield
+void rcFreeHeightField(rcHeightfield* hf);
+
+/// Allocates a compact heightfield object using the Recast allocator.
+///  @return A compact heightfield that is ready for initialization, or null on failure.
+///  @ingroup recast
+///  @see rcBuildCompactHeightfield, rcFreeCompactHeightfield
+rcCompactHeightfield* rcAllocCompactHeightfield();
+
+/// Frees the specified compact heightfield object using the Recast allocator.
+///  @param[in]		chf		A compact heightfield allocated using #rcAllocCompactHeightfield
+///  @ingroup recast
+///  @see rcAllocCompactHeightfield
+void rcFreeCompactHeightfield(rcCompactHeightfield* chf);
+
+/// Allocates a heightfield layer set using the Recast allocator.
+///  @return A heightfield layer set that is ready for initialization, or null on failure.
+///  @ingroup recast
+///  @see rcBuildHeightfieldLayers, rcFreeHeightfieldLayerSet
+rcHeightfieldLayerSet* rcAllocHeightfieldLayerSet();
+
+/// Frees the specified heightfield layer set using the Recast allocator.
+///  @param[in]		lset	A heightfield layer set allocated using #rcAllocHeightfieldLayerSet
+///  @ingroup recast
+///  @see rcAllocHeightfieldLayerSet
+void rcFreeHeightfieldLayerSet(rcHeightfieldLayerSet* lset);
+
+/// Allocates a contour set object using the Recast allocator.
+///  @return A contour set that is ready for initialization, or null on failure.
+///  @ingroup recast
+///  @see rcBuildContours, rcFreeContourSet
+rcContourSet* rcAllocContourSet();
+
+/// Frees the specified contour set using the Recast allocator.
+///  @param[in]		cset	A contour set allocated using #rcAllocContourSet
+///  @ingroup recast
+///  @see rcAllocContourSet
+void rcFreeContourSet(rcContourSet* cset);
+
+/// Allocates a polygon mesh object using the Recast allocator.
+///  @return A polygon mesh that is ready for initialization, or null on failure.
+///  @ingroup recast
+///  @see rcBuildPolyMesh, rcFreePolyMesh
+rcPolyMesh* rcAllocPolyMesh();
+
+/// Frees the specified polygon mesh using the Recast allocator.
+///  @param[in]		pmesh	A polygon mesh allocated using #rcAllocPolyMesh
+///  @ingroup recast
+///  @see rcAllocPolyMesh
+void rcFreePolyMesh(rcPolyMesh* pmesh);
+
+/// Allocates a detail mesh object using the Recast allocator.
+///  @return A detail mesh that is ready for initialization, or null on failure.
+///  @ingroup recast
+///  @see rcBuildPolyMeshDetail, rcFreePolyMeshDetail
+rcPolyMeshDetail* rcAllocPolyMeshDetail();
+
+/// Frees the specified detail mesh using the Recast allocator.
+///  @param[in]		dmesh	A detail mesh allocated using #rcAllocPolyMeshDetail
+///  @ingroup recast
+///  @see rcAllocPolyMeshDetail
+void rcFreePolyMeshDetail(rcPolyMeshDetail* dmesh);
+
+/// @}
+
+/// Heighfield border flag.
+/// If a heightfield region ID has this bit set, then the region is a border 
+/// region and its spans are considered unwalkable.
+/// (Used during the region and contour build process.)
+/// @see rcCompactSpan::reg
+static const unsigned short RC_BORDER_REG = 0x8000;
+
+/// Border vertex flag.
+/// If a region ID has this bit set, then the associated element lies on
+/// a tile border. If a contour vertex's region ID has this bit set, the 
+/// vertex will later be removed in order to match the segments and vertices 
+/// at tile boundaries.
+/// (Used during the build process.)
+/// @see rcCompactSpan::reg, #rcContour::verts, #rcContour::rverts
+static const int RC_BORDER_VERTEX = 0x10000;
+
+/// Area border flag.
+/// If a region ID has this bit set, then the associated element lies on
+/// the border of an area.
+/// (Used during the region and contour build process.)
+/// @see rcCompactSpan::reg, #rcContour::verts, #rcContour::rverts
+static const int RC_AREA_BORDER = 0x20000;
+
+/// Contour build flags.
+/// @see rcBuildContours
+enum rcBuildContoursFlags
+{
+	RC_CONTOUR_TESS_WALL_EDGES = 0x01,	///< Tessellate solid (impassable) edges during contour simplification.
+	RC_CONTOUR_TESS_AREA_EDGES = 0x02,	///< Tessellate edges between areas during contour simplification.
+};
+
+/// Applied to the region id field of contour vertices in order to extract the region id.
+/// The region id field of a vertex may have several flags applied to it.  So the
+/// fields value can't be used directly.
+/// @see rcContour::verts, rcContour::rverts
+static const int RC_CONTOUR_REG_MASK = 0xffff;
+
+/// An value which indicates an invalid index within a mesh.
+/// @note This does not necessarily indicate an error.
+/// @see rcPolyMesh::polys
+static const unsigned short RC_MESH_NULL_IDX = 0xffff;
+
+/// Represents the null area.
+/// When a data element is given this value it is considered to no longer be 
+/// assigned to a usable area.  (E.g. It is unwalkable.)
+static const unsigned char RC_NULL_AREA = 0;
+
+/// The default area id used to indicate a walkable polygon. 
+/// This is also the maximum allowed area id, and the only non-null area id 
+/// recognized by some steps in the build process. 
+static const unsigned char RC_WALKABLE_AREA = 63;
+
+/// The value returned by #rcGetCon if the specified direction is not connected
+/// to another span. (Has no neighbor.)
+static const int RC_NOT_CONNECTED = 0x3f;
+
+/// @name General helper functions
+/// @{
+
+/// Swaps the values of the two parameters.
+///  @param[in,out]	a	Value A
+///  @param[in,out]	b	Value B
+template<class T> inline void rcSwap(T& a, T& b) { T t = a; a = b; b = t; }
+
+/// Returns the minimum of two values.
+///  @param[in]		a	Value A
+///  @param[in]		b	Value B
+///  @return The minimum of the two values.
+template<class T> inline T rcMin(T a, T b) { return a < b ? a : b; }
+
+/// Returns the maximum of two values.
+///  @param[in]		a	Value A
+///  @param[in]		b	Value B
+///  @return The maximum of the two values.
+template<class T> inline T rcMax(T a, T b) { return a > b ? a : b; }
+
+/// Returns the absolute value.
+///  @param[in]		a	The value.
+///  @return The absolute value of the specified value.
+template<class T> inline T rcAbs(T a) { return a < 0 ? -a : a; }
+
+/// Returns the square of the value.
+///  @param[in]		a	The value.
+///  @return The square of the value.
+template<class T> inline T rcSqr(T a) { return a*a; }
+
+/// Clamps the value to the specified range.
+///  @param[in]		v	The value to clamp.
+///  @param[in]		mn	The minimum permitted return value.
+///  @param[in]		mx	The maximum permitted return value.
+///  @return The value, clamped to the specified range.
+template<class T> inline T rcClamp(T v, T mn, T mx) { return v < mn ? mn : (v > mx ? mx : v); }
+
+/// Returns the square root of the value.
+///  @param[in]		x	The value.
+///  @return The square root of the vlaue.
+float rcSqrt(float x);
+
+/// @}
+/// @name Vector helper functions.
+/// @{
+
+/// Derives the cross product of two vectors. (@p v1 x @p v2)
+///  @param[out]	dest	The cross product. [(x, y, z)]
+///  @param[in]		v1		A Vector [(x, y, z)]
+///  @param[in]		v2		A vector [(x, y, z)]
+inline void rcVcross(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[1]*v2[2] - v1[2]*v2[1];
+	dest[1] = v1[2]*v2[0] - v1[0]*v2[2];
+	dest[2] = v1[0]*v2[1] - v1[1]*v2[0];
+}
+
+/// Derives the dot product of two vectors. (@p v1 . @p v2)
+///  @param[in]		v1	A Vector [(x, y, z)]
+///  @param[in]		v2	A vector [(x, y, z)]
+/// @return The dot product.
+inline float rcVdot(const float* v1, const float* v2)
+{
+	return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
+}
+
+/// Performs a scaled vector addition. (@p v1 + (@p v2 * @p s))
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to scale and add to @p v1. [(x, y, z)]
+///  @param[in]		s		The amount to scale @p v2 by before adding to @p v1.
+inline void rcVmad(float* dest, const float* v1, const float* v2, const float s)
+{
+	dest[0] = v1[0]+v2[0]*s;
+	dest[1] = v1[1]+v2[1]*s;
+	dest[2] = v1[2]+v2[2]*s;
+}
+
+/// Performs a vector addition. (@p v1 + @p v2)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to add to @p v1. [(x, y, z)]
+inline void rcVadd(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[0]+v2[0];
+	dest[1] = v1[1]+v2[1];
+	dest[2] = v1[2]+v2[2];
+}
+
+/// Performs a vector subtraction. (@p v1 - @p v2)
+///  @param[out]	dest	The result vector. [(x, y, z)]
+///  @param[in]		v1		The base vector. [(x, y, z)]
+///  @param[in]		v2		The vector to subtract from @p v1. [(x, y, z)]
+inline void rcVsub(float* dest, const float* v1, const float* v2)
+{
+	dest[0] = v1[0]-v2[0];
+	dest[1] = v1[1]-v2[1];
+	dest[2] = v1[2]-v2[2];
+}
+
+/// Selects the minimum value of each element from the specified vectors.
+///  @param[in,out]	mn	A vector.  (Will be updated with the result.) [(x, y, z)]
+///  @param[in]		v	A vector. [(x, y, z)]
+inline void rcVmin(float* mn, const float* v)
+{
+	mn[0] = rcMin(mn[0], v[0]);
+	mn[1] = rcMin(mn[1], v[1]);
+	mn[2] = rcMin(mn[2], v[2]);
+}
+
+/// Selects the maximum value of each element from the specified vectors.
+///  @param[in,out]	mx	A vector.  (Will be updated with the result.) [(x, y, z)]
+///  @param[in]		v	A vector. [(x, y, z)]
+inline void rcVmax(float* mx, const float* v)
+{
+	mx[0] = rcMax(mx[0], v[0]);
+	mx[1] = rcMax(mx[1], v[1]);
+	mx[2] = rcMax(mx[2], v[2]);
+}
+
+/// Performs a vector copy.
+///  @param[out]	dest	The result. [(x, y, z)]
+///  @param[in]		v		The vector to copy. [(x, y, z)]
+inline void rcVcopy(float* dest, const float* v)
+{
+	dest[0] = v[0];
+	dest[1] = v[1];
+	dest[2] = v[2];
+}
+
+/// Returns the distance between two points.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The distance between the two points.
+inline float rcVdist(const float* v1, const float* v2)
+{
+	float dx = v2[0] - v1[0];
+	float dy = v2[1] - v1[1];
+	float dz = v2[2] - v1[2];
+	return rcSqrt(dx*dx + dy*dy + dz*dz);
+}
+
+/// Returns the square of the distance between two points.
+///  @param[in]		v1	A point. [(x, y, z)]
+///  @param[in]		v2	A point. [(x, y, z)]
+/// @return The square of the distance between the two points.
+inline float rcVdistSqr(const float* v1, const float* v2)
+{
+	float dx = v2[0] - v1[0];
+	float dy = v2[1] - v1[1];
+	float dz = v2[2] - v1[2];
+	return dx*dx + dy*dy + dz*dz;
+}
+
+/// Normalizes the vector.
+///  @param[in,out]	v	The vector to normalize. [(x, y, z)]
+inline void rcVnormalize(float* v)
+{
+	float d = 1.0f / rcSqrt(rcSqr(v[0]) + rcSqr(v[1]) + rcSqr(v[2]));
+	v[0] *= d;
+	v[1] *= d;
+	v[2] *= d;
+}
+
+/// @}
+/// @name Heightfield Functions
+/// @see rcHeightfield
+/// @{
+
+/// Calculates the bounding box of an array of vertices.
+///  @ingroup recast
+///  @param[in]		verts	An array of vertices. [(x, y, z) * @p nv]
+///  @param[in]		nv		The number of vertices in the @p verts array.
+///  @param[out]	bmin	The minimum bounds of the AABB. [(x, y, z)] [Units: wu]
+///  @param[out]	bmax	The maximum bounds of the AABB. [(x, y, z)] [Units: wu]
+void rcCalcBounds(const float* verts, int nv, float* bmin, float* bmax);
+
+/// Calculates the grid size based on the bounding box and grid cell size.
+///  @ingroup recast
+///  @param[in]		bmin	The minimum bounds of the AABB. [(x, y, z)] [Units: wu]
+///  @param[in]		bmax	The maximum bounds of the AABB. [(x, y, z)] [Units: wu]
+///  @param[in]		cs		The xz-plane cell size. [Limit: > 0] [Units: wu]
+///  @param[out]	w		The width along the x-axis. [Limit: >= 0] [Units: vx]
+///  @param[out]	h		The height along the z-axis. [Limit: >= 0] [Units: vx]
+void rcCalcGridSize(const float* bmin, const float* bmax, float cs, int* w, int* h);
+
+/// Initializes a new heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in,out]	hf		The allocated heightfield to initialize.
+///  @param[in]		width	The width of the field along the x-axis. [Limit: >= 0] [Units: vx]
+///  @param[in]		height	The height of the field along the z-axis. [Limit: >= 0] [Units: vx]
+///  @param[in]		bmin	The minimum bounds of the field's AABB. [(x, y, z)] [Units: wu]
+///  @param[in]		bmax	The maximum bounds of the field's AABB. [(x, y, z)] [Units: wu]
+///  @param[in]		cs		The xz-plane cell size to use for the field. [Limit: > 0] [Units: wu]
+///  @param[in]		ch		The y-axis cell size to use for field. [Limit: > 0] [Units: wu]
+bool rcCreateHeightfield(rcContext* ctx, rcHeightfield& hf, int width, int height,
+						 const float* bmin, const float* bmax,
+						 float cs, float ch);
+
+/// Sets the area id of all triangles with a slope below the specified value
+/// to #RC_WALKABLE_AREA.
+///  @ingroup recast
+///  @param[in,out]	ctx					The build context to use during the operation.
+///  @param[in]		walkableSlopeAngle	The maximum slope that is considered walkable.
+///  									[Limits: 0 <= value < 90] [Units: Degrees]
+///  @param[in]		verts				The vertices. [(x, y, z) * @p nv]
+///  @param[in]		nv					The number of vertices.
+///  @param[in]		tris				The triangle vertex indices. [(vertA, vertB, vertC) * @p nt]
+///  @param[in]		nt					The number of triangles.
+///  @param[out]	areas				The triangle area ids. [Length: >= @p nt]
+void rcMarkWalkableTriangles(rcContext* ctx, const float walkableSlopeAngle, const float* verts, int nv,
+							 const int* tris, int nt, unsigned char* areas); 
+
+/// Sets the area id of all triangles with a slope greater than or equal to the specified value to #RC_NULL_AREA.
+///  @ingroup recast
+///  @param[in,out]	ctx					The build context to use during the operation.
+///  @param[in]		walkableSlopeAngle	The maximum slope that is considered walkable.
+///  									[Limits: 0 <= value < 90] [Units: Degrees]
+///  @param[in]		verts				The vertices. [(x, y, z) * @p nv]
+///  @param[in]		nv					The number of vertices.
+///  @param[in]		tris				The triangle vertex indices. [(vertA, vertB, vertC) * @p nt]
+///  @param[in]		nt					The number of triangles.
+///  @param[out]	areas				The triangle area ids. [Length: >= @p nt]
+void rcClearUnwalkableTriangles(rcContext* ctx, const float walkableSlopeAngle, const float* verts, int nv,
+								const int* tris, int nt, unsigned char* areas); 
+
+/// Adds a span to the specified heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in,out]	hf				An initialized heightfield.
+///  @param[in]		x				The width index where the span is to be added.
+///  								[Limits: 0 <= value < rcHeightfield::width]
+///  @param[in]		y				The height index where the span is to be added.
+///  								[Limits: 0 <= value < rcHeightfield::height]
+///  @param[in]		smin			The minimum height of the span. [Limit: < @p smax] [Units: vx]
+///  @param[in]		smax			The maximum height of the span. [Limit: <= #RC_SPAN_MAX_HEIGHT] [Units: vx]
+///  @param[in]		area			The area id of the span. [Limit: <= #RC_WALKABLE_AREA)
+///  @param[in]		flagMergeThr	The merge theshold. [Limit: >= 0] [Units: vx]
+void rcAddSpan(rcContext* ctx, rcHeightfield& hf, const int x, const int y,
+			   const unsigned short smin, const unsigned short smax,
+			   const unsigned char area, const int flagMergeThr);
+
+/// Rasterizes a triangle into the specified heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		v0				Triangle vertex 0 [(x, y, z)]
+///  @param[in]		v1				Triangle vertex 1 [(x, y, z)]
+///  @param[in]		v2				Triangle vertex 2 [(x, y, z)]
+///  @param[in]		area			The area id of the triangle. [Limit: <= #RC_WALKABLE_AREA]
+///  @param[in,out]	solid			An initialized heightfield.
+///  @param[in]		flagMergeThr	The distance where the walkable flag is favored over the non-walkable flag.
+///  								[Limit: >= 0] [Units: vx]
+void rcRasterizeTriangle(rcContext* ctx, const float* v0, const float* v1, const float* v2,
+						 const unsigned char area, rcHeightfield& solid,
+						 const int flagMergeThr = 1);
+
+/// Rasterizes an indexed triangle mesh into the specified heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		verts			The vertices. [(x, y, z) * @p nv]
+///  @param[in]		nv				The number of vertices.
+///  @param[in]		tris			The triangle indices. [(vertA, vertB, vertC) * @p nt]
+///  @param[in]		areas			The area id's of the triangles. [Limit: <= #RC_WALKABLE_AREA] [Size: @p nt]
+///  @param[in]		nt				The number of triangles.
+///  @param[in,out]	solid			An initialized heightfield.
+///  @param[in]		flagMergeThr	The distance where the walkable flag is favored over the non-walkable flag. 
+///  								[Limit: >= 0] [Units: vx]
+void rcRasterizeTriangles(rcContext* ctx, const float* verts, const int nv,
+						  const int* tris, const unsigned char* areas, const int nt,
+						  rcHeightfield& solid, const int flagMergeThr = 1);
+
+/// Rasterizes an indexed triangle mesh into the specified heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx			The build context to use during the operation.
+///  @param[in]		verts		The vertices. [(x, y, z) * @p nv]
+///  @param[in]		nv			The number of vertices.
+///  @param[in]		tris		The triangle indices. [(vertA, vertB, vertC) * @p nt]
+///  @param[in]		areas		The area id's of the triangles. [Limit: <= #RC_WALKABLE_AREA] [Size: @p nt]
+///  @param[in]		nt			The number of triangles.
+///  @param[in,out]	solid		An initialized heightfield.
+///  @param[in]		flagMergeThr	The distance where the walkable flag is favored over the non-walkable flag. 
+///  							[Limit: >= 0] [Units: vx]
+void rcRasterizeTriangles(rcContext* ctx, const float* verts, const int nv,
+						  const unsigned short* tris, const unsigned char* areas, const int nt,
+						  rcHeightfield& solid, const int flagMergeThr = 1);
+
+/// Rasterizes triangles into the specified heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		verts			The triangle vertices. [(ax, ay, az, bx, by, bz, cx, by, cx) * @p nt]
+///  @param[in]		areas			The area id's of the triangles. [Limit: <= #RC_WALKABLE_AREA] [Size: @p nt]
+///  @param[in]		nt				The number of triangles.
+///  @param[in,out]	solid			An initialized heightfield.
+///  @param[in]		flagMergeThr	The distance where the walkable flag is favored over the non-walkable flag. 
+///  								[Limit: >= 0] [Units: vx]
+void rcRasterizeTriangles(rcContext* ctx, const float* verts, const unsigned char* areas, const int nt,
+						  rcHeightfield& solid, const int flagMergeThr = 1);
+
+/// Marks non-walkable spans as walkable if their maximum is within @p walkableClimp of a walkable neihbor. 
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		walkableClimb	Maximum ledge height that is considered to still be traversable. 
+///  								[Limit: >=0] [Units: vx]
+///  @param[in,out]	solid			A fully built heightfield.  (All spans have been added.)
+void rcFilterLowHangingWalkableObstacles(rcContext* ctx, const int walkableClimb, rcHeightfield& solid);
+
+/// Marks spans that are ledges as not-walkable. 
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		walkableHeight	Minimum floor to 'ceiling' height that will still allow the floor area to 
+///  								be considered walkable. [Limit: >= 3] [Units: vx]
+///  @param[in]		walkableClimb	Maximum ledge height that is considered to still be traversable. 
+///  								[Limit: >=0] [Units: vx]
+///  @param[in,out]	solid			A fully built heightfield.  (All spans have been added.)
+void rcFilterLedgeSpans(rcContext* ctx, const int walkableHeight,
+						const int walkableClimb, rcHeightfield& solid);
+
+/// Marks walkable spans as not walkable if the clearence above the span is less than the specified height. 
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		walkableHeight	Minimum floor to 'ceiling' height that will still allow the floor area to 
+///  								be considered walkable. [Limit: >= 3] [Units: vx]
+///  @param[in,out]	solid			A fully built heightfield.  (All spans have been added.)
+void rcFilterWalkableLowHeightSpans(rcContext* ctx, int walkableHeight, rcHeightfield& solid);
+
+/// Returns the number of spans contained in the specified heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		hf		An initialized heightfield.
+///  @returns The number of spans in the heightfield.
+int rcGetHeightFieldSpanCount(rcContext* ctx, rcHeightfield& hf);
+
+/// @}
+/// @name Compact Heightfield Functions
+/// @see rcCompactHeightfield
+/// @{
+
+/// Builds a compact heightfield representing open space, from a heightfield representing solid space.
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		walkableHeight	Minimum floor to 'ceiling' height that will still allow the floor area 
+///  								to be considered walkable. [Limit: >= 3] [Units: vx]
+///  @param[in]		walkableClimb	Maximum ledge height that is considered to still be traversable. 
+///  								[Limit: >=0] [Units: vx]
+///  @param[in]		hf				The heightfield to be compacted.
+///  @param[out]	chf				The resulting compact heightfield. (Must be pre-allocated.)
+///  @returns True if the operation completed successfully.
+bool rcBuildCompactHeightfield(rcContext* ctx, const int walkableHeight, const int walkableClimb,
+							   rcHeightfield& hf, rcCompactHeightfield& chf);
+
+/// Erodes the walkable area within the heightfield by the specified radius. 
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		radius	The radius of erosion. [Limits: 0 < value < 255] [Units: vx]
+///  @param[in,out]	chf		The populated compact heightfield to erode.
+///  @returns True if the operation completed successfully.
+bool rcErodeWalkableArea(rcContext* ctx, int radius, rcCompactHeightfield& chf);
+
+/// Applies a median filter to walkable area types (based on area id), removing noise.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in,out]	chf		A populated compact heightfield.
+///  @returns True if the operation completed successfully.
+bool rcMedianFilterWalkableArea(rcContext* ctx, rcCompactHeightfield& chf);
+
+/// Applies an area id to all spans within the specified bounding box. (AABB) 
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		bmin	The minimum of the bounding box. [(x, y, z)]
+///  @param[in]		bmax	The maximum of the bounding box. [(x, y, z)]
+///  @param[in]		areaId	The area id to apply. [Limit: <= #RC_WALKABLE_AREA]
+///  @param[in,out]	chf		A populated compact heightfield.
+void rcMarkBoxArea(rcContext* ctx, const float* bmin, const float* bmax, unsigned char areaId,
+				   rcCompactHeightfield& chf);
+
+/// Applies the area id to the all spans within the specified convex polygon. 
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		verts	The vertices of the polygon [Fomr: (x, y, z) * @p nverts]
+///  @param[in]		nverts	The number of vertices in the polygon.
+///  @param[in]		hmin	The height of the base of the polygon.
+///  @param[in]		hmax	The height of the top of the polygon.
+///  @param[in]		areaId	The area id to apply. [Limit: <= #RC_WALKABLE_AREA]
+///  @param[in,out]	chf		A populated compact heightfield.
+void rcMarkConvexPolyArea(rcContext* ctx, const float* verts, const int nverts,
+						  const float hmin, const float hmax, unsigned char areaId,
+						  rcCompactHeightfield& chf);
+
+/// Helper function to offset voncex polygons for rcMarkConvexPolyArea.
+///  @ingroup recast
+///  @param[in]		verts		The vertices of the polygon [Form: (x, y, z) * @p nverts]
+///  @param[in]		nverts		The number of vertices in the polygon.
+///  @param[out]	outVerts	The offset vertices (should hold up to 2 * @p nverts) [Form: (x, y, z) * return value]
+///  @param[in]		maxOutVerts	The max number of vertices that can be stored to @p outVerts.
+///  @returns Number of vertices in the offset polygon or 0 if too few vertices in @p outVerts.
+int rcOffsetPoly(const float* verts, const int nverts, const float offset,
+				 float* outVerts, const int maxOutVerts);
+
+/// Applies the area id to all spans within the specified cylinder.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		pos		The center of the base of the cylinder. [Form: (x, y, z)] 
+///  @param[in]		r		The radius of the cylinder.
+///  @param[in]		h		The height of the cylinder.
+///  @param[in]		areaId	The area id to apply. [Limit: <= #RC_WALKABLE_AREA]
+///  @param[in,out]	chf	A populated compact heightfield.
+void rcMarkCylinderArea(rcContext* ctx, const float* pos,
+						const float r, const float h, unsigned char areaId,
+						rcCompactHeightfield& chf);
+
+/// Builds the distance field for the specified compact heightfield. 
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in,out]	chf		A populated compact heightfield.
+///  @returns True if the operation completed successfully.
+bool rcBuildDistanceField(rcContext* ctx, rcCompactHeightfield& chf);
+
+/// Builds region data for the heightfield using watershed partitioning. 
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in,out]	chf				A populated compact heightfield.
+///  @param[in]		borderSize		The size of the non-navigable border around the heightfield.
+///  								[Limit: >=0] [Units: vx]
+///  @param[in]		minRegionArea	The minimum number of cells allowed to form isolated island areas.
+///  								[Limit: >=0] [Units: vx].
+///  @param[in]		mergeRegionArea		Any regions with a span count smaller than this value will, if possible,
+///  								be merged with larger regions. [Limit: >=0] [Units: vx] 
+///  @returns True if the operation completed successfully.
+bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf,
+					const int borderSize, const int minRegionArea, const int mergeRegionArea);
+
+/// Builds region data for the heightfield using simple monotone partitioning.
+///  @ingroup recast 
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in,out]	chf				A populated compact heightfield.
+///  @param[in]		borderSize		The size of the non-navigable border around the heightfield.
+///  								[Limit: >=0] [Units: vx]
+///  @param[in]		minRegionArea	The minimum number of cells allowed to form isolated island areas.
+///  								[Limit: >=0] [Units: vx].
+///  @param[in]		mergeRegionArea	Any regions with a span count smaller than this value will, if possible, 
+///  								be merged with larger regions. [Limit: >=0] [Units: vx] 
+///  @returns True if the operation completed successfully.
+bool rcBuildRegionsMonotone(rcContext* ctx, rcCompactHeightfield& chf,
+							const int borderSize, const int minRegionArea, const int mergeRegionArea);
+
+
+/// Sets the neighbor connection data for the specified direction.
+///  @param[in]		s		The span to update.
+///  @param[in]		dir		The direction to set. [Limits: 0 <= value < 4]
+///  @param[in]		i		The index of the neighbor span.
+inline void rcSetCon(rcCompactSpan& s, int dir, int i)
+{
+	const unsigned int shift = (unsigned int)dir*6;
+	unsigned int con = s.con;
+	s.con = (con & ~(0x3f << shift)) | (((unsigned int)i & 0x3f) << shift);
+}
+
+/// Gets neighbor connection data for the specified direction.
+///  @param[in]		s		The span to check.
+///  @param[in]		dir		The direction to check. [Limits: 0 <= value < 4]
+///  @return The neighbor connection data for the specified direction,
+///  	or #RC_NOT_CONNECTED if there is no connection.
+inline int rcGetCon(const rcCompactSpan& s, int dir)
+{
+	const unsigned int shift = (unsigned int)dir*6;
+	return (s.con >> shift) & 0x3f;
+}
+
+/// Gets the standard width (x-axis) offset for the specified direction.
+///  @param[in]		dir		The direction. [Limits: 0 <= value < 4]
+///  @return The width offset to apply to the current cell position to move
+///  	in the direction.
+inline int rcGetDirOffsetX(int dir)
+{
+	const int offset[4] = { -1, 0, 1, 0, };
+	return offset[dir&0x03];
+}
+
+/// Gets the standard height (z-axis) offset for the specified direction.
+///  @param[in]		dir		The direction. [Limits: 0 <= value < 4]
+///  @return The height offset to apply to the current cell position to move
+///  	in the direction.
+inline int rcGetDirOffsetY(int dir)
+{
+	const int offset[4] = { 0, 1, 0, -1 };
+	return offset[dir&0x03];
+}
+
+/// @}
+/// @name Layer, Contour, Polymesh, and Detail Mesh Functions
+/// @see rcHeightfieldLayer, rcContourSet, rcPolyMesh, rcPolyMeshDetail
+/// @{
+
+/// Builds a layer set from the specified compact heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx			The build context to use during the operation.
+///  @param[in]		chf			A fully built compact heightfield.
+///  @param[in]		borderSize	The size of the non-navigable border around the heightfield. [Limit: >=0] 
+///  							[Units: vx]
+///  @param[in]		walkableHeight	Minimum floor to 'ceiling' height that will still allow the floor area 
+///  							to be considered walkable. [Limit: >= 3] [Units: vx]
+///  @param[out]	lset		The resulting layer set. (Must be pre-allocated.)
+///  @returns True if the operation completed successfully.
+bool rcBuildHeightfieldLayers(rcContext* ctx, rcCompactHeightfield& chf, 
+							  const int borderSize, const int walkableHeight,
+							  rcHeightfieldLayerSet& lset);
+
+/// Builds a contour set from the region outlines in the provided compact heightfield.
+///  @ingroup recast
+///  @param[in,out]	ctx			The build context to use during the operation.
+///  @param[in]		chf			A fully built compact heightfield.
+///  @param[in]		maxError	The maximum distance a simplfied contour's border edges should deviate 
+///  							the original raw contour. [Limit: >=0] [Units: wu]
+///  @param[in]		maxEdgeLen	The maximum allowed length for contour edges along the border of the mesh. 
+///  							[Limit: >=0] [Units: vx]
+///  @param[out]	cset		The resulting contour set. (Must be pre-allocated.)
+///  @param[in]		buildFlags	The build flags. (See: #rcBuildContoursFlags)
+///  @returns True if the operation completed successfully.
+bool rcBuildContours(rcContext* ctx, rcCompactHeightfield& chf,
+					 const float maxError, const int maxEdgeLen,
+					 rcContourSet& cset, const int flags = RC_CONTOUR_TESS_WALL_EDGES);
+
+/// Builds a polygon mesh from the provided contours.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		cset	A fully built contour set.
+///  @param[in]		nvp		The maximum number of vertices allowed for polygons generated during the 
+///  						contour to polygon conversion process. [Limit: >= 3] 
+///  @param[out]	mesh	The resulting polygon mesh. (Must be re-allocated.)
+///  @returns True if the operation completed successfully.
+bool rcBuildPolyMesh(rcContext* ctx, rcContourSet& cset, const int nvp, rcPolyMesh& mesh);
+
+/// Merges multiple polygon meshes into a single mesh.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		meshes	An array of polygon meshes to merge. [Size: @p nmeshes]
+///  @param[in]		nmeshes	The number of polygon meshes in the meshes array.
+///  @param[in]		mesh	The resulting polygon mesh. (Must be pre-allocated.)
+///  @returns True if the operation completed successfully.
+bool rcMergePolyMeshes(rcContext* ctx, rcPolyMesh** meshes, const int nmeshes, rcPolyMesh& mesh);
+
+/// Builds a detail mesh from the provided polygon mesh.
+///  @ingroup recast
+///  @param[in,out]	ctx				The build context to use during the operation.
+///  @param[in]		mesh			A fully built polygon mesh.
+///  @param[in]		chf				The compact heightfield used to build the polygon mesh.
+///  @param[in]		sampleDist		Sets the distance to use when samping the heightfield. [Limit: >=0] [Units: wu]
+///  @param[in]		sampleMaxError	The maximum distance the detail mesh surface should deviate from 
+///  								heightfield data. [Limit: >=0] [Units: wu]
+///  @param[out]	dmesh			The resulting detail mesh.  (Must be pre-allocated.)
+///  @returns True if the operation completed successfully.
+bool rcBuildPolyMeshDetail(rcContext* ctx, const rcPolyMesh& mesh, const rcCompactHeightfield& chf,
+						   const float sampleDist, const float sampleMaxError,
+						   rcPolyMeshDetail& dmesh);
+
+/// Copies the poly mesh data from src to dst.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		src		The source mesh to copy from.
+///  @param[out]	dst		The resulting detail mesh. (Must be pre-allocated, must be empty mesh.)
+///  @returns True if the operation completed successfully.
+bool rcCopyPolyMesh(rcContext* ctx, const rcPolyMesh& src, rcPolyMesh& dst);
+
+/// Merges multiple detail meshes into a single detail mesh.
+///  @ingroup recast
+///  @param[in,out]	ctx		The build context to use during the operation.
+///  @param[in]		meshes	An array of detail meshes to merge. [Size: @p nmeshes]
+///  @param[in]		nmeshes	The number of detail meshes in the meshes array.
+///  @param[out]	mesh	The resulting detail mesh. (Must be pre-allocated.)
+///  @returns True if the operation completed successfully.
+bool rcMergePolyMeshDetails(rcContext* ctx, rcPolyMeshDetail** meshes, const int nmeshes, rcPolyMeshDetail& mesh);
+
+/// @}
+
+#endif // RECAST_H
+
+///////////////////////////////////////////////////////////////////////////
+
+// Due to the large amount of detail documentation for this file, 
+// the content normally located at the end of the header file has been separated
+// out to a file in /Docs/Extern.
diff --git a/Engine/lib/recast/Recast/Include/RecastAlloc.h b/Engine/lib/recast/Recast/Include/RecastAlloc.h
new file mode 100644
index 000000000..438be9ea5
--- /dev/null
+++ b/Engine/lib/recast/Recast/Include/RecastAlloc.h
@@ -0,0 +1,124 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef RECASTALLOC_H
+#define RECASTALLOC_H
+
+/// Provides hint values to the memory allocator on how long the
+/// memory is expected to be used.
+enum rcAllocHint
+{
+	RC_ALLOC_PERM,		///< Memory will persist after a function call.
+	RC_ALLOC_TEMP		///< Memory used temporarily within a function.
+};
+
+/// A memory allocation function.
+//  @param[in]		size			The size, in bytes of memory, to allocate.
+//  @param[in]		rcAllocHint	A hint to the allocator on how long the memory is expected to be in use.
+//  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+///  @see rcAllocSetCustom
+typedef void* (rcAllocFunc)(int size, rcAllocHint hint);
+
+/// A memory deallocation function.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #rcAllocFunc.
+/// @see rcAllocSetCustom
+typedef void (rcFreeFunc)(void* ptr);
+
+/// Sets the base custom allocation functions to be used by Recast.
+///  @param[in]		allocFunc	The memory allocation function to be used by #rcAlloc
+///  @param[in]		freeFunc	The memory de-allocation function to be used by #rcFree
+void rcAllocSetCustom(rcAllocFunc *allocFunc, rcFreeFunc *freeFunc);
+
+/// Allocates a memory block.
+///  @param[in]		size	The size, in bytes of memory, to allocate.
+///  @param[in]		hint	A hint to the allocator on how long the memory is expected to be in use.
+///  @return A pointer to the beginning of the allocated memory block, or null if the allocation failed.
+/// @see rcFree
+void* rcAlloc(int size, rcAllocHint hint);
+
+/// Deallocates a memory block.
+///  @param[in]		ptr		A pointer to a memory block previously allocated using #rcAlloc.
+/// @see rcAlloc
+void rcFree(void* ptr);
+
+
+/// A simple dynamic array of integers.
+class rcIntArray
+{
+	int* m_data;
+	int m_size, m_cap;
+	inline rcIntArray(const rcIntArray&);
+	inline rcIntArray& operator=(const rcIntArray&);
+public:
+
+	/// Constructs an instance with an initial array size of zero.
+	inline rcIntArray() : m_data(0), m_size(0), m_cap(0) {}
+
+	/// Constructs an instance initialized to the specified size.
+	///  @param[in]		n	The initial size of the integer array.
+	inline rcIntArray(int n) : m_data(0), m_size(0), m_cap(0) { resize(n); }
+	inline ~rcIntArray() { rcFree(m_data); }
+
+	/// Specifies the new size of the integer array.
+	///  @param[in]		n	The new size of the integer array.
+	void resize(int n);
+
+	/// Push the specified integer onto the end of the array and increases the size by one.
+	///  @param[in]		item	The new value.
+	inline void push(int item) { resize(m_size+1); m_data[m_size-1] = item; }
+
+	/// Returns the value at the end of the array and reduces the size by one.
+	///  @return The value at the end of the array.
+	inline int pop() { if (m_size > 0) m_size--; return m_data[m_size]; }
+
+	/// The value at the specified array index.
+	/// @warning Does not provide overflow protection.
+	///  @param[in]		i	The index of the value.
+	inline const int& operator[](int i) const { return m_data[i]; }
+
+	/// The value at the specified array index.
+	/// @warning Does not provide overflow protection.
+	///  @param[in]		i	The index of the value.
+	inline int& operator[](int i) { return m_data[i]; }
+
+	/// The current size of the integer array.
+	inline int size() const { return m_size; }
+};
+
+/// A simple helper class used to delete an array when it goes out of scope.
+/// @note This class is rarely if ever used by the end user.
+template<class T> class rcScopedDelete
+{
+	T* ptr;
+	inline T* operator=(T* p);
+public:
+
+	/// Constructs an instance with a null pointer.
+	inline rcScopedDelete() : ptr(0) {}
+
+	/// Constructs an instance with the specified pointer.
+	///  @param[in]		p	An pointer to an allocated array.
+	inline rcScopedDelete(T* p) : ptr(p) {}
+	inline ~rcScopedDelete() { rcFree(ptr); }
+
+	/// The root array pointer.
+	///  @return The root array pointer.
+	inline operator T*() { return ptr; }
+};
+
+#endif
diff --git a/Engine/lib/recast/Recast/Include/RecastAssert.h b/Engine/lib/recast/Recast/Include/RecastAssert.h
new file mode 100644
index 000000000..2aca0d9a1
--- /dev/null
+++ b/Engine/lib/recast/Recast/Include/RecastAssert.h
@@ -0,0 +1,33 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#ifndef RECASTASSERT_H
+#define RECASTASSERT_H
+
+// Note: This header file's only purpose is to include define assert.
+// Feel free to change the file and include your own implementation instead.
+
+#ifdef NDEBUG
+// From http://cnicholson.net/2009/02/stupid-c-tricks-adventures-in-assert/
+#	define rcAssert(x) do { (void)sizeof(x); } while((void)(__LINE__==-1),false)  
+#else
+#	include <assert.h> 
+#	define rcAssert assert
+#endif
+
+#endif // RECASTASSERT_H
diff --git a/Engine/lib/recast/Recast/Source/Recast.cpp b/Engine/lib/recast/Recast/Source/Recast.cpp
new file mode 100644
index 000000000..803daac3b
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/Recast.cpp
@@ -0,0 +1,493 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <float.h>
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+
+float rcSqrt(float x)
+{
+	return sqrtf(x);
+}
+
+/// @class rcContext
+/// @par
+///
+/// This class does not provide logging or timer functionality on its 
+/// own.  Both must be provided by a concrete implementation 
+/// by overriding the protected member functions.  Also, this class does not 
+/// provide an interface for extracting log messages. (Only adding them.) 
+/// So concrete implementations must provide one.
+///
+/// If no logging or timers are required, just pass an instance of this 
+/// class through the Recast build process.
+///
+
+/// @par
+///
+/// Example:
+/// @code
+/// // Where ctx is an instance of rcContext and filepath is a char array.
+/// ctx->log(RC_LOG_ERROR, "buildTiledNavigation: Could not load '%s'", filepath);
+/// @endcode
+void rcContext::log(const rcLogCategory category, const char* format, ...)
+{
+	if (!m_logEnabled)
+		return;
+	static const int MSG_SIZE = 512;
+	char msg[MSG_SIZE];
+	va_list ap;
+	va_start(ap, format);
+	int len = vsnprintf(msg, MSG_SIZE, format, ap);
+	if (len >= MSG_SIZE)
+	{
+		len = MSG_SIZE-1;
+		msg[MSG_SIZE-1] = '\0';
+	}
+	va_end(ap);
+	doLog(category, msg, len);
+}
+
+rcHeightfield* rcAllocHeightfield()
+{
+	rcHeightfield* hf = (rcHeightfield*)rcAlloc(sizeof(rcHeightfield), RC_ALLOC_PERM);
+	memset(hf, 0, sizeof(rcHeightfield));
+	return hf;
+}
+
+void rcFreeHeightField(rcHeightfield* hf)
+{
+	if (!hf) return;
+	// Delete span array.
+	rcFree(hf->spans);
+	// Delete span pools.
+	while (hf->pools)
+	{
+		rcSpanPool* next = hf->pools->next;
+		rcFree(hf->pools);
+		hf->pools = next;
+	}
+	rcFree(hf);
+}
+
+rcCompactHeightfield* rcAllocCompactHeightfield()
+{
+	rcCompactHeightfield* chf = (rcCompactHeightfield*)rcAlloc(sizeof(rcCompactHeightfield), RC_ALLOC_PERM);
+	memset(chf, 0, sizeof(rcCompactHeightfield));
+	return chf;
+}
+
+void rcFreeCompactHeightfield(rcCompactHeightfield* chf)
+{
+	if (!chf) return;
+	rcFree(chf->cells);
+	rcFree(chf->spans);
+	rcFree(chf->dist);
+	rcFree(chf->areas);
+	rcFree(chf);
+}
+
+
+rcHeightfieldLayerSet* rcAllocHeightfieldLayerSet()
+{
+	rcHeightfieldLayerSet* lset = (rcHeightfieldLayerSet*)rcAlloc(sizeof(rcHeightfieldLayerSet), RC_ALLOC_PERM);
+	memset(lset, 0, sizeof(rcHeightfieldLayerSet));
+	return lset;
+}
+
+void rcFreeHeightfieldLayerSet(rcHeightfieldLayerSet* lset)
+{
+	if (!lset) return;
+	for (int i = 0; i < lset->nlayers; ++i)
+	{
+		rcFree(lset->layers[i].heights);
+		rcFree(lset->layers[i].areas);
+		rcFree(lset->layers[i].cons);
+	}
+	rcFree(lset->layers);
+	rcFree(lset);
+}
+
+
+rcContourSet* rcAllocContourSet()
+{
+	rcContourSet* cset = (rcContourSet*)rcAlloc(sizeof(rcContourSet), RC_ALLOC_PERM);
+	memset(cset, 0, sizeof(rcContourSet));
+	return cset;
+}
+
+void rcFreeContourSet(rcContourSet* cset)
+{
+	if (!cset) return;
+	for (int i = 0; i < cset->nconts; ++i)
+	{
+		rcFree(cset->conts[i].verts);
+		rcFree(cset->conts[i].rverts);
+	}
+	rcFree(cset->conts);
+	rcFree(cset);
+}
+
+rcPolyMesh* rcAllocPolyMesh()
+{
+	rcPolyMesh* pmesh = (rcPolyMesh*)rcAlloc(sizeof(rcPolyMesh), RC_ALLOC_PERM);
+	memset(pmesh, 0, sizeof(rcPolyMesh));
+	return pmesh;
+}
+
+void rcFreePolyMesh(rcPolyMesh* pmesh)
+{
+	if (!pmesh) return;
+	rcFree(pmesh->verts);
+	rcFree(pmesh->polys);
+	rcFree(pmesh->regs);
+	rcFree(pmesh->flags);
+	rcFree(pmesh->areas);
+	rcFree(pmesh);
+}
+
+rcPolyMeshDetail* rcAllocPolyMeshDetail()
+{
+	rcPolyMeshDetail* dmesh = (rcPolyMeshDetail*)rcAlloc(sizeof(rcPolyMeshDetail), RC_ALLOC_PERM);
+	memset(dmesh, 0, sizeof(rcPolyMeshDetail));
+	return dmesh;
+}
+
+void rcFreePolyMeshDetail(rcPolyMeshDetail* dmesh)
+{
+	if (!dmesh) return;
+	rcFree(dmesh->meshes);
+	rcFree(dmesh->verts);
+	rcFree(dmesh->tris);
+	rcFree(dmesh);
+}
+
+void rcCalcBounds(const float* verts, int nv, float* bmin, float* bmax)
+{
+	// Calculate bounding box.
+	rcVcopy(bmin, verts);
+	rcVcopy(bmax, verts);
+	for (int i = 1; i < nv; ++i)
+	{
+		const float* v = &verts[i*3];
+		rcVmin(bmin, v);
+		rcVmax(bmax, v);
+	}
+}
+
+void rcCalcGridSize(const float* bmin, const float* bmax, float cs, int* w, int* h)
+{
+	*w = (int)((bmax[0] - bmin[0])/cs+0.5f);
+	*h = (int)((bmax[2] - bmin[2])/cs+0.5f);
+}
+
+/// @par
+///
+/// See the #rcConfig documentation for more information on the configuration parameters.
+/// 
+/// @see rcAllocHeightfield, rcHeightfield 
+bool rcCreateHeightfield(rcContext* /*ctx*/, rcHeightfield& hf, int width, int height,
+						 const float* bmin, const float* bmax,
+						 float cs, float ch)
+{
+	// TODO: VC complains about unref formal variable, figure out a way to handle this better.
+//	rcAssert(ctx);
+	
+	hf.width = width;
+	hf.height = height;
+	rcVcopy(hf.bmin, bmin);
+	rcVcopy(hf.bmax, bmax);
+	hf.cs = cs;
+	hf.ch = ch;
+	hf.spans = (rcSpan**)rcAlloc(sizeof(rcSpan*)*hf.width*hf.height, RC_ALLOC_PERM);
+	if (!hf.spans)
+		return false;
+	memset(hf.spans, 0, sizeof(rcSpan*)*hf.width*hf.height);
+	return true;
+}
+
+static void calcTriNormal(const float* v0, const float* v1, const float* v2, float* norm)
+{
+	float e0[3], e1[3];
+	rcVsub(e0, v1, v0);
+	rcVsub(e1, v2, v0);
+	rcVcross(norm, e0, e1);
+	rcVnormalize(norm);
+}
+
+/// @par
+///
+/// Only sets the aread id's for the walkable triangles.  Does not alter the
+/// area id's for unwalkable triangles.
+/// 
+/// See the #rcConfig documentation for more information on the configuration parameters.
+/// 
+/// @see rcHeightfield, rcClearUnwalkableTriangles, rcRasterizeTriangles
+void rcMarkWalkableTriangles(rcContext* /*ctx*/, const float walkableSlopeAngle,
+							 const float* verts, int /*nv*/,
+							 const int* tris, int nt,
+							 unsigned char* areas)
+{
+	// TODO: VC complains about unref formal variable, figure out a way to handle this better.
+//	rcAssert(ctx);
+	
+	const float walkableThr = cosf(walkableSlopeAngle/180.0f*RC_PI);
+
+	float norm[3];
+	
+	for (int i = 0; i < nt; ++i)
+	{
+		const int* tri = &tris[i*3];
+		calcTriNormal(&verts[tri[0]*3], &verts[tri[1]*3], &verts[tri[2]*3], norm);
+		// Check if the face is walkable.
+		if (norm[1] > walkableThr)
+			areas[i] = RC_WALKABLE_AREA;
+	}
+}
+
+/// @par
+///
+/// Only sets the aread id's for the unwalkable triangles.  Does not alter the
+/// area id's for walkable triangles.
+/// 
+/// See the #rcConfig documentation for more information on the configuration parameters.
+/// 
+/// @see rcHeightfield, rcClearUnwalkableTriangles, rcRasterizeTriangles
+void rcClearUnwalkableTriangles(rcContext* /*ctx*/, const float walkableSlopeAngle,
+								const float* verts, int /*nv*/,
+								const int* tris, int nt,
+								unsigned char* areas)
+{
+	// TODO: VC complains about unref formal variable, figure out a way to handle this better.
+//	rcAssert(ctx);
+	
+	const float walkableThr = cosf(walkableSlopeAngle/180.0f*RC_PI);
+	
+	float norm[3];
+	
+	for (int i = 0; i < nt; ++i)
+	{
+		const int* tri = &tris[i*3];
+		calcTriNormal(&verts[tri[0]*3], &verts[tri[1]*3], &verts[tri[2]*3], norm);
+		// Check if the face is walkable.
+		if (norm[1] <= walkableThr)
+			areas[i] = RC_NULL_AREA;
+	}
+}
+
+int rcGetHeightFieldSpanCount(rcContext* /*ctx*/, rcHeightfield& hf)
+{
+	// TODO: VC complains about unref formal variable, figure out a way to handle this better.
+//	rcAssert(ctx);
+	
+	const int w = hf.width;
+	const int h = hf.height;
+	int spanCount = 0;
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			for (rcSpan* s = hf.spans[x + y*w]; s; s = s->next)
+			{
+				if (s->area != RC_NULL_AREA)
+					spanCount++;
+			}
+		}
+	}
+	return spanCount;
+}
+
+/// @par
+///
+/// This is just the beginning of the process of fully building a compact heightfield.
+/// Various filters may be applied applied, then the distance field and regions built.
+/// E.g: #rcBuildDistanceField and #rcBuildRegions
+///
+/// See the #rcConfig documentation for more information on the configuration parameters.
+///
+/// @see rcAllocCompactHeightfield, rcHeightfield, rcCompactHeightfield, rcConfig
+bool rcBuildCompactHeightfield(rcContext* ctx, const int walkableHeight, const int walkableClimb,
+							   rcHeightfield& hf, rcCompactHeightfield& chf)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_BUILD_COMPACTHEIGHTFIELD);
+	
+	const int w = hf.width;
+	const int h = hf.height;
+	const int spanCount = rcGetHeightFieldSpanCount(ctx, hf);
+
+	// Fill in header.
+	chf.width = w;
+	chf.height = h;
+	chf.spanCount = spanCount;
+	chf.walkableHeight = walkableHeight;
+	chf.walkableClimb = walkableClimb;
+	chf.maxRegions = 0;
+	rcVcopy(chf.bmin, hf.bmin);
+	rcVcopy(chf.bmax, hf.bmax);
+	chf.bmax[1] += walkableHeight*hf.ch;
+	chf.cs = hf.cs;
+	chf.ch = hf.ch;
+	chf.cells = (rcCompactCell*)rcAlloc(sizeof(rcCompactCell)*w*h, RC_ALLOC_PERM);
+	if (!chf.cells)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Out of memory 'chf.cells' (%d)", w*h);
+		return false;
+	}
+	memset(chf.cells, 0, sizeof(rcCompactCell)*w*h);
+	chf.spans = (rcCompactSpan*)rcAlloc(sizeof(rcCompactSpan)*spanCount, RC_ALLOC_PERM);
+	if (!chf.spans)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Out of memory 'chf.spans' (%d)", spanCount);
+		return false;
+	}
+	memset(chf.spans, 0, sizeof(rcCompactSpan)*spanCount);
+	chf.areas = (unsigned char*)rcAlloc(sizeof(unsigned char)*spanCount, RC_ALLOC_PERM);
+	if (!chf.areas)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Out of memory 'chf.areas' (%d)", spanCount);
+		return false;
+	}
+	memset(chf.areas, RC_NULL_AREA, sizeof(unsigned char)*spanCount);
+	
+	const int MAX_HEIGHT = 0xffff;
+	
+	// Fill in cells and spans.
+	int idx = 0;
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcSpan* s = hf.spans[x + y*w];
+			// If there are no spans at this cell, just leave the data to index=0, count=0.
+			if (!s) continue;
+			rcCompactCell& c = chf.cells[x+y*w];
+			c.index = idx;
+			c.count = 0;
+			while (s)
+			{
+				if (s->area != RC_NULL_AREA)
+				{
+					const int bot = (int)s->smax;
+					const int top = s->next ? (int)s->next->smin : MAX_HEIGHT;
+					chf.spans[idx].y = (unsigned short)rcClamp(bot, 0, 0xffff);
+					chf.spans[idx].h = (unsigned char)rcClamp(top - bot, 0, 0xff);
+					chf.areas[idx] = s->area;
+					idx++;
+					c.count++;
+				}
+				s = s->next;
+			}
+		}
+	}
+
+	// Find neighbour connections.
+	const int MAX_LAYERS = RC_NOT_CONNECTED-1;
+	int tooHighNeighbour = 0;
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				rcCompactSpan& s = chf.spans[i];
+				
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					rcSetCon(s, dir, RC_NOT_CONNECTED);
+					const int nx = x + rcGetDirOffsetX(dir);
+					const int ny = y + rcGetDirOffsetY(dir);
+					// First check that the neighbour cell is in bounds.
+					if (nx < 0 || ny < 0 || nx >= w || ny >= h)
+						continue;
+						
+					// Iterate over all neighbour spans and check if any of the is
+					// accessible from current cell.
+					const rcCompactCell& nc = chf.cells[nx+ny*w];
+					for (int k = (int)nc.index, nk = (int)(nc.index+nc.count); k < nk; ++k)
+					{
+						const rcCompactSpan& ns = chf.spans[k];
+						const int bot = rcMax(s.y, ns.y);
+						const int top = rcMin(s.y+s.h, ns.y+ns.h);
+
+						// Check that the gap between the spans is walkable,
+						// and that the climb height between the gaps is not too high.
+						if ((top - bot) >= walkableHeight && rcAbs((int)ns.y - (int)s.y) <= walkableClimb)
+						{
+							// Mark direction as walkable.
+							const int lidx = k - (int)nc.index;
+							if (lidx < 0 || lidx > MAX_LAYERS)
+							{
+								tooHighNeighbour = rcMax(tooHighNeighbour, lidx);
+								continue;
+							}
+							rcSetCon(s, dir, lidx);
+							break;
+						}
+					}
+					
+				}
+			}
+		}
+	}
+	
+	if (tooHighNeighbour > MAX_LAYERS)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildCompactHeightfield: Heightfield has too many layers %d (max: %d)",
+				 tooHighNeighbour, MAX_LAYERS);
+	}
+		
+	ctx->stopTimer(RC_TIMER_BUILD_COMPACTHEIGHTFIELD);
+	
+	return true;
+}
+
+/*
+static int getHeightfieldMemoryUsage(const rcHeightfield& hf)
+{
+	int size = 0;
+	size += sizeof(hf);
+	size += hf.width * hf.height * sizeof(rcSpan*);
+	
+	rcSpanPool* pool = hf.pools;
+	while (pool)
+	{
+		size += (sizeof(rcSpanPool) - sizeof(rcSpan)) + sizeof(rcSpan)*RC_SPANS_PER_POOL;
+		pool = pool->next;
+	}
+	return size;
+}
+
+static int getCompactHeightFieldMemoryusage(const rcCompactHeightfield& chf)
+{
+	int size = 0;
+	size += sizeof(rcCompactHeightfield);
+	size += sizeof(rcCompactSpan) * chf.spanCount;
+	size += sizeof(rcCompactCell) * chf.width * chf.height;
+	return size;
+}
+*/
\ No newline at end of file
diff --git a/Engine/lib/recast/Recast/Source/RecastAlloc.cpp b/Engine/lib/recast/Recast/Source/RecastAlloc.cpp
new file mode 100644
index 000000000..b5ec15161
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastAlloc.cpp
@@ -0,0 +1,88 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <stdlib.h>
+#include <string.h>
+#include "RecastAlloc.h"
+
+static void *rcAllocDefault(int size, rcAllocHint)
+{
+	return malloc(size);
+}
+
+static void rcFreeDefault(void *ptr)
+{
+	free(ptr);
+}
+
+static rcAllocFunc* sRecastAllocFunc = rcAllocDefault;
+static rcFreeFunc* sRecastFreeFunc = rcFreeDefault;
+
+/// @see rcAlloc, rcFree
+void rcAllocSetCustom(rcAllocFunc *allocFunc, rcFreeFunc *freeFunc)
+{
+	sRecastAllocFunc = allocFunc ? allocFunc : rcAllocDefault;
+	sRecastFreeFunc = freeFunc ? freeFunc : rcFreeDefault;
+}
+
+/// @see rcAllocSetCustom
+void* rcAlloc(int size, rcAllocHint hint)
+{
+	return sRecastAllocFunc(size, hint);
+}
+
+/// @par
+///
+/// @warning This function leaves the value of @p ptr unchanged.  So it still
+/// points to the same (now invalid) location, and not to null.
+/// 
+/// @see rcAllocSetCustom
+void rcFree(void* ptr)
+{
+	if (ptr)
+		sRecastFreeFunc(ptr);
+}
+
+/// @class rcIntArray
+///
+/// While it is possible to pre-allocate a specific array size during 
+/// construction or by using the #resize method, certain methods will 
+/// automatically resize the array as needed.
+///
+/// @warning The array memory is not initialized to zero when the size is 
+/// manually set during construction or when using #resize.
+
+/// @par
+///
+/// Using this method ensures the array is at least large enough to hold
+/// the specified number of elements.  This can improve performance by
+/// avoiding auto-resizing during use.
+void rcIntArray::resize(int n)
+{
+	if (n > m_cap)
+	{
+		if (!m_cap) m_cap = n;
+		while (m_cap < n) m_cap *= 2;
+		int* newData = (int*)rcAlloc(m_cap*sizeof(int), RC_ALLOC_TEMP);
+		if (m_size && newData) memcpy(newData, m_data, m_size*sizeof(int));
+		rcFree(m_data);
+		m_data = newData;
+	}
+	m_size = n;
+}
+
diff --git a/Engine/lib/recast/Recast/Source/RecastArea.cpp b/Engine/lib/recast/Recast/Source/RecastArea.cpp
new file mode 100644
index 000000000..1a338cd9b
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastArea.cpp
@@ -0,0 +1,602 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <float.h>
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+
+/// @par 
+/// 
+/// Basically, any spans that are closer to a boundary or obstruction than the specified radius 
+/// are marked as unwalkable.
+///
+/// This method is usually called immediately after the heightfield has been built.
+///
+/// @see rcCompactHeightfield, rcBuildCompactHeightfield, rcConfig::walkableRadius
+bool rcErodeWalkableArea(rcContext* ctx, int radius, rcCompactHeightfield& chf)
+{
+	rcAssert(ctx);
+	
+	const int w = chf.width;
+	const int h = chf.height;
+	
+	ctx->startTimer(RC_TIMER_ERODE_AREA);
+	
+	unsigned char* dist = (unsigned char*)rcAlloc(sizeof(unsigned char)*chf.spanCount, RC_ALLOC_TEMP);
+	if (!dist)
+	{
+		ctx->log(RC_LOG_ERROR, "erodeWalkableArea: Out of memory 'dist' (%d).", chf.spanCount);
+		return false;
+	}
+	
+	// Init distance.
+	memset(dist, 0xff, sizeof(unsigned char)*chf.spanCount);
+	
+	// Mark boundary cells.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				if (chf.areas[i] == RC_NULL_AREA)
+				{
+					dist[i] = 0;
+				}
+				else
+				{
+					const rcCompactSpan& s = chf.spans[i];
+					int nc = 0;
+					for (int dir = 0; dir < 4; ++dir)
+					{
+						if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+						{
+							const int nx = x + rcGetDirOffsetX(dir);
+							const int ny = y + rcGetDirOffsetY(dir);
+							const int nidx = (int)chf.cells[nx+ny*w].index + rcGetCon(s, dir);
+							if (chf.areas[nidx] != RC_NULL_AREA)
+							{
+								nc++;
+							}
+						}
+					}
+					// At least one missing neighbour.
+					if (nc != 4)
+						dist[i] = 0;
+				}
+			}
+		}
+	}
+	
+	unsigned char nd;
+	
+	// Pass 1
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				
+				if (rcGetCon(s, 0) != RC_NOT_CONNECTED)
+				{
+					// (-1,0)
+					const int ax = x + rcGetDirOffsetX(0);
+					const int ay = y + rcGetDirOffsetY(0);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 0);
+					const rcCompactSpan& as = chf.spans[ai];
+					nd = (unsigned char)rcMin((int)dist[ai]+2, 255);
+					if (nd < dist[i])
+						dist[i] = nd;
+					
+					// (-1,-1)
+					if (rcGetCon(as, 3) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(3);
+						const int aay = ay + rcGetDirOffsetY(3);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 3);
+						nd = (unsigned char)rcMin((int)dist[aai]+3, 255);
+						if (nd < dist[i])
+							dist[i] = nd;
+					}
+				}
+				if (rcGetCon(s, 3) != RC_NOT_CONNECTED)
+				{
+					// (0,-1)
+					const int ax = x + rcGetDirOffsetX(3);
+					const int ay = y + rcGetDirOffsetY(3);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 3);
+					const rcCompactSpan& as = chf.spans[ai];
+					nd = (unsigned char)rcMin((int)dist[ai]+2, 255);
+					if (nd < dist[i])
+						dist[i] = nd;
+					
+					// (1,-1)
+					if (rcGetCon(as, 2) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(2);
+						const int aay = ay + rcGetDirOffsetY(2);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 2);
+						nd = (unsigned char)rcMin((int)dist[aai]+3, 255);
+						if (nd < dist[i])
+							dist[i] = nd;
+					}
+				}
+			}
+		}
+	}
+	
+	// Pass 2
+	for (int y = h-1; y >= 0; --y)
+	{
+		for (int x = w-1; x >= 0; --x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				
+				if (rcGetCon(s, 2) != RC_NOT_CONNECTED)
+				{
+					// (1,0)
+					const int ax = x + rcGetDirOffsetX(2);
+					const int ay = y + rcGetDirOffsetY(2);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 2);
+					const rcCompactSpan& as = chf.spans[ai];
+					nd = (unsigned char)rcMin((int)dist[ai]+2, 255);
+					if (nd < dist[i])
+						dist[i] = nd;
+					
+					// (1,1)
+					if (rcGetCon(as, 1) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(1);
+						const int aay = ay + rcGetDirOffsetY(1);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 1);
+						nd = (unsigned char)rcMin((int)dist[aai]+3, 255);
+						if (nd < dist[i])
+							dist[i] = nd;
+					}
+				}
+				if (rcGetCon(s, 1) != RC_NOT_CONNECTED)
+				{
+					// (0,1)
+					const int ax = x + rcGetDirOffsetX(1);
+					const int ay = y + rcGetDirOffsetY(1);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 1);
+					const rcCompactSpan& as = chf.spans[ai];
+					nd = (unsigned char)rcMin((int)dist[ai]+2, 255);
+					if (nd < dist[i])
+						dist[i] = nd;
+					
+					// (-1,1)
+					if (rcGetCon(as, 0) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(0);
+						const int aay = ay + rcGetDirOffsetY(0);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 0);
+						nd = (unsigned char)rcMin((int)dist[aai]+3, 255);
+						if (nd < dist[i])
+							dist[i] = nd;
+					}
+				}
+			}
+		}
+	}
+	
+	const unsigned char thr = (unsigned char)(radius*2);
+	for (int i = 0; i < chf.spanCount; ++i)
+		if (dist[i] < thr)
+			chf.areas[i] = RC_NULL_AREA;
+	
+	rcFree(dist);
+	
+	ctx->stopTimer(RC_TIMER_ERODE_AREA);
+	
+	return true;
+}
+
+static void insertSort(unsigned char* a, const int n)
+{
+	int i, j;
+	for (i = 1; i < n; i++)
+	{
+		const unsigned char value = a[i];
+		for (j = i - 1; j >= 0 && a[j] > value; j--)
+			a[j+1] = a[j];
+		a[j+1] = value;
+	}
+}
+
+/// @par
+///
+/// This filter is usually applied after applying area id's using functions
+/// such as #rcMarkBoxArea, #rcMarkConvexPolyArea, and #rcMarkCylinderArea.
+/// 
+/// @see rcCompactHeightfield
+bool rcMedianFilterWalkableArea(rcContext* ctx, rcCompactHeightfield& chf)
+{
+	rcAssert(ctx);
+	
+	const int w = chf.width;
+	const int h = chf.height;
+	
+	ctx->startTimer(RC_TIMER_MEDIAN_AREA);
+	
+	unsigned char* areas = (unsigned char*)rcAlloc(sizeof(unsigned char)*chf.spanCount, RC_ALLOC_TEMP);
+	if (!areas)
+	{
+		ctx->log(RC_LOG_ERROR, "medianFilterWalkableArea: Out of memory 'areas' (%d).", chf.spanCount);
+		return false;
+	}
+	
+	// Init distance.
+	memset(areas, 0xff, sizeof(unsigned char)*chf.spanCount);
+	
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				if (chf.areas[i] == RC_NULL_AREA)
+				{
+					areas[i] = chf.areas[i];
+					continue;
+				}
+				
+				unsigned char nei[9];
+				for (int j = 0; j < 9; ++j)
+					nei[j] = chf.areas[i];
+				
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+					{
+						const int ax = x + rcGetDirOffsetX(dir);
+						const int ay = y + rcGetDirOffsetY(dir);
+						const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, dir);
+						if (chf.areas[ai] != RC_NULL_AREA)
+							nei[dir*2+0] = chf.areas[ai];
+						
+						const rcCompactSpan& as = chf.spans[ai];
+						const int dir2 = (dir+1) & 0x3;
+						if (rcGetCon(as, dir2) != RC_NOT_CONNECTED)
+						{
+							const int ax2 = ax + rcGetDirOffsetX(dir2);
+							const int ay2 = ay + rcGetDirOffsetY(dir2);
+							const int ai2 = (int)chf.cells[ax2+ay2*w].index + rcGetCon(as, dir2);
+							if (chf.areas[ai2] != RC_NULL_AREA)
+								nei[dir*2+1] = chf.areas[ai2];
+						}
+					}
+				}
+				insertSort(nei, 9);
+				areas[i] = nei[4];
+			}
+		}
+	}
+	
+	memcpy(chf.areas, areas, sizeof(unsigned char)*chf.spanCount);
+	
+	rcFree(areas);
+
+	ctx->stopTimer(RC_TIMER_MEDIAN_AREA);
+	
+	return true;
+}
+
+/// @par
+///
+/// The value of spacial parameters are in world units.
+/// 
+/// @see rcCompactHeightfield, rcMedianFilterWalkableArea
+void rcMarkBoxArea(rcContext* ctx, const float* bmin, const float* bmax, unsigned char areaId,
+				   rcCompactHeightfield& chf)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_MARK_BOX_AREA);
+
+	int minx = (int)((bmin[0]-chf.bmin[0])/chf.cs);
+	int miny = (int)((bmin[1]-chf.bmin[1])/chf.ch);
+	int minz = (int)((bmin[2]-chf.bmin[2])/chf.cs);
+	int maxx = (int)((bmax[0]-chf.bmin[0])/chf.cs);
+	int maxy = (int)((bmax[1]-chf.bmin[1])/chf.ch);
+	int maxz = (int)((bmax[2]-chf.bmin[2])/chf.cs);
+	
+	if (maxx < 0) return;
+	if (minx >= chf.width) return;
+	if (maxz < 0) return;
+	if (minz >= chf.height) return;
+
+	if (minx < 0) minx = 0;
+	if (maxx >= chf.width) maxx = chf.width-1;
+	if (minz < 0) minz = 0;
+	if (maxz >= chf.height) maxz = chf.height-1;	
+	
+	for (int z = minz; z <= maxz; ++z)
+	{
+		for (int x = minx; x <= maxx; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+z*chf.width];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				rcCompactSpan& s = chf.spans[i];
+				if ((int)s.y >= miny && (int)s.y <= maxy)
+				{
+					if (chf.areas[i] != RC_NULL_AREA)
+						chf.areas[i] = areaId;
+				}
+			}
+		}
+	}
+
+	ctx->stopTimer(RC_TIMER_MARK_BOX_AREA);
+
+}
+
+
+static int pointInPoly(int nvert, const float* verts, const float* p)
+{
+	int i, j, c = 0;
+	for (i = 0, j = nvert-1; i < nvert; j = i++)
+	{
+		const float* vi = &verts[i*3];
+		const float* vj = &verts[j*3];
+		if (((vi[2] > p[2]) != (vj[2] > p[2])) &&
+			(p[0] < (vj[0]-vi[0]) * (p[2]-vi[2]) / (vj[2]-vi[2]) + vi[0]) )
+			c = !c;
+	}
+	return c;
+}
+
+/// @par
+///
+/// The value of spacial parameters are in world units.
+/// 
+/// The y-values of the polygon vertices are ignored. So the polygon is effectively 
+/// projected onto the xz-plane at @p hmin, then extruded to @p hmax.
+/// 
+/// @see rcCompactHeightfield, rcMedianFilterWalkableArea
+void rcMarkConvexPolyArea(rcContext* ctx, const float* verts, const int nverts,
+						  const float hmin, const float hmax, unsigned char areaId,
+						  rcCompactHeightfield& chf)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_MARK_CONVEXPOLY_AREA);
+
+	float bmin[3], bmax[3];
+	rcVcopy(bmin, verts);
+	rcVcopy(bmax, verts);
+	for (int i = 1; i < nverts; ++i)
+	{
+		rcVmin(bmin, &verts[i*3]);
+		rcVmax(bmax, &verts[i*3]);
+	}
+	bmin[1] = hmin;
+	bmax[1] = hmax;
+
+	int minx = (int)((bmin[0]-chf.bmin[0])/chf.cs);
+	int miny = (int)((bmin[1]-chf.bmin[1])/chf.ch);
+	int minz = (int)((bmin[2]-chf.bmin[2])/chf.cs);
+	int maxx = (int)((bmax[0]-chf.bmin[0])/chf.cs);
+	int maxy = (int)((bmax[1]-chf.bmin[1])/chf.ch);
+	int maxz = (int)((bmax[2]-chf.bmin[2])/chf.cs);
+	
+	if (maxx < 0) return;
+	if (minx >= chf.width) return;
+	if (maxz < 0) return;
+	if (minz >= chf.height) return;
+	
+	if (minx < 0) minx = 0;
+	if (maxx >= chf.width) maxx = chf.width-1;
+	if (minz < 0) minz = 0;
+	if (maxz >= chf.height) maxz = chf.height-1;	
+	
+	
+	// TODO: Optimize.
+	for (int z = minz; z <= maxz; ++z)
+	{
+		for (int x = minx; x <= maxx; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+z*chf.width];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				rcCompactSpan& s = chf.spans[i];
+				if (chf.areas[i] == RC_NULL_AREA)
+					continue;
+				if ((int)s.y >= miny && (int)s.y <= maxy)
+				{
+					float p[3];
+					p[0] = chf.bmin[0] + (x+0.5f)*chf.cs; 
+					p[1] = 0;
+					p[2] = chf.bmin[2] + (z+0.5f)*chf.cs; 
+
+					if (pointInPoly(nverts, verts, p))
+					{
+						chf.areas[i] = areaId;
+					}
+				}
+			}
+		}
+	}
+
+	ctx->stopTimer(RC_TIMER_MARK_CONVEXPOLY_AREA);
+}
+
+int rcOffsetPoly(const float* verts, const int nverts, const float offset,
+				 float* outVerts, const int maxOutVerts)
+{
+	const float	MITER_LIMIT = 1.20f;
+
+	int n = 0;
+
+	for (int i = 0; i < nverts; i++)
+	{
+		const int a = (i+nverts-1) % nverts;
+		const int b = i;
+		const int c = (i+1) % nverts;
+		const float* va = &verts[a*3];
+		const float* vb = &verts[b*3];
+		const float* vc = &verts[c*3];
+		float dx0 = vb[0] - va[0];
+		float dy0 = vb[2] - va[2];
+		float d0 = dx0*dx0 + dy0*dy0;
+		if (d0 > 1e-6f)
+		{
+			d0 = 1.0f/rcSqrt(d0);
+			dx0 *= d0;
+			dy0 *= d0;
+		}
+		float dx1 = vc[0] - vb[0];
+		float dy1 = vc[2] - vb[2];
+		float d1 = dx1*dx1 + dy1*dy1;
+		if (d1 > 1e-6f)
+		{
+			d1 = 1.0f/rcSqrt(d1);
+			dx1 *= d1;
+			dy1 *= d1;
+		}
+		const float dlx0 = -dy0;
+		const float dly0 = dx0;
+		const float dlx1 = -dy1;
+		const float dly1 = dx1;
+		float cross = dx1*dy0 - dx0*dy1;
+		float dmx = (dlx0 + dlx1) * 0.5f;
+		float dmy = (dly0 + dly1) * 0.5f;
+		float dmr2 = dmx*dmx + dmy*dmy;
+		bool bevel = dmr2 * MITER_LIMIT*MITER_LIMIT < 1.0f;
+		if (dmr2 > 1e-6f)
+		{
+			const float scale = 1.0f / dmr2;
+			dmx *= scale;
+			dmy *= scale;
+		}
+
+		if (bevel && cross < 0.0f)
+		{
+			if (n+2 >= maxOutVerts)
+				return 0;
+			float d = (1.0f - (dx0*dx1 + dy0*dy1))*0.5f;
+			outVerts[n*3+0] = vb[0] + (-dlx0+dx0*d)*offset;
+			outVerts[n*3+1] = vb[1];
+			outVerts[n*3+2] = vb[2] + (-dly0+dy0*d)*offset;
+			n++;
+			outVerts[n*3+0] = vb[0] + (-dlx1-dx1*d)*offset;
+			outVerts[n*3+1] = vb[1];
+			outVerts[n*3+2] = vb[2] + (-dly1-dy1*d)*offset;
+			n++;
+		}
+		else
+		{
+			if (n+1 >= maxOutVerts)
+				return 0;
+			outVerts[n*3+0] = vb[0] - dmx*offset;
+			outVerts[n*3+1] = vb[1];
+			outVerts[n*3+2] = vb[2] - dmy*offset;
+			n++;
+		}
+	}
+	
+	return n;
+}
+
+
+/// @par
+///
+/// The value of spacial parameters are in world units.
+/// 
+/// @see rcCompactHeightfield, rcMedianFilterWalkableArea
+void rcMarkCylinderArea(rcContext* ctx, const float* pos,
+						const float r, const float h, unsigned char areaId,
+						rcCompactHeightfield& chf)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_MARK_CYLINDER_AREA);
+	
+	float bmin[3], bmax[3];
+	bmin[0] = pos[0] - r;
+	bmin[1] = pos[1];
+	bmin[2] = pos[2] - r;
+	bmax[0] = pos[0] + r;
+	bmax[1] = pos[1] + h;
+	bmax[2] = pos[2] + r;
+	const float r2 = r*r;
+	
+	int minx = (int)((bmin[0]-chf.bmin[0])/chf.cs);
+	int miny = (int)((bmin[1]-chf.bmin[1])/chf.ch);
+	int minz = (int)((bmin[2]-chf.bmin[2])/chf.cs);
+	int maxx = (int)((bmax[0]-chf.bmin[0])/chf.cs);
+	int maxy = (int)((bmax[1]-chf.bmin[1])/chf.ch);
+	int maxz = (int)((bmax[2]-chf.bmin[2])/chf.cs);
+	
+	if (maxx < 0) return;
+	if (minx >= chf.width) return;
+	if (maxz < 0) return;
+	if (minz >= chf.height) return;
+	
+	if (minx < 0) minx = 0;
+	if (maxx >= chf.width) maxx = chf.width-1;
+	if (minz < 0) minz = 0;
+	if (maxz >= chf.height) maxz = chf.height-1;	
+	
+	
+	for (int z = minz; z <= maxz; ++z)
+	{
+		for (int x = minx; x <= maxx; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+z*chf.width];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				rcCompactSpan& s = chf.spans[i];
+				
+				if (chf.areas[i] == RC_NULL_AREA)
+					continue;
+				
+				if ((int)s.y >= miny && (int)s.y <= maxy)
+				{
+					const float sx = chf.bmin[0] + (x+0.5f)*chf.cs; 
+					const float sz = chf.bmin[2] + (z+0.5f)*chf.cs; 
+					const float dx = sx - pos[0];
+					const float dz = sz - pos[2];
+					
+					if (dx*dx + dz*dz < r2)
+					{
+						chf.areas[i] = areaId;
+					}
+				}
+			}
+		}
+	}
+	
+	ctx->stopTimer(RC_TIMER_MARK_CYLINDER_AREA);
+}
diff --git a/Engine/lib/recast/Recast/Source/RecastContour.cpp b/Engine/lib/recast/Recast/Source/RecastContour.cpp
new file mode 100644
index 000000000..5c324bced
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastContour.cpp
@@ -0,0 +1,851 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+
+
+static int getCornerHeight(int x, int y, int i, int dir,
+						   const rcCompactHeightfield& chf,
+						   bool& isBorderVertex)
+{
+	const rcCompactSpan& s = chf.spans[i];
+	int ch = (int)s.y;
+	int dirp = (dir+1) & 0x3;
+	
+	unsigned int regs[4] = {0,0,0,0};
+	
+	// Combine region and area codes in order to prevent
+	// border vertices which are in between two areas to be removed. 
+	regs[0] = chf.spans[i].reg | (chf.areas[i] << 16);
+	
+	if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+	{
+		const int ax = x + rcGetDirOffsetX(dir);
+		const int ay = y + rcGetDirOffsetY(dir);
+		const int ai = (int)chf.cells[ax+ay*chf.width].index + rcGetCon(s, dir);
+		const rcCompactSpan& as = chf.spans[ai];
+		ch = rcMax(ch, (int)as.y);
+		regs[1] = chf.spans[ai].reg | (chf.areas[ai] << 16);
+		if (rcGetCon(as, dirp) != RC_NOT_CONNECTED)
+		{
+			const int ax2 = ax + rcGetDirOffsetX(dirp);
+			const int ay2 = ay + rcGetDirOffsetY(dirp);
+			const int ai2 = (int)chf.cells[ax2+ay2*chf.width].index + rcGetCon(as, dirp);
+			const rcCompactSpan& as2 = chf.spans[ai2];
+			ch = rcMax(ch, (int)as2.y);
+			regs[2] = chf.spans[ai2].reg | (chf.areas[ai2] << 16);
+		}
+	}
+	if (rcGetCon(s, dirp) != RC_NOT_CONNECTED)
+	{
+		const int ax = x + rcGetDirOffsetX(dirp);
+		const int ay = y + rcGetDirOffsetY(dirp);
+		const int ai = (int)chf.cells[ax+ay*chf.width].index + rcGetCon(s, dirp);
+		const rcCompactSpan& as = chf.spans[ai];
+		ch = rcMax(ch, (int)as.y);
+		regs[3] = chf.spans[ai].reg | (chf.areas[ai] << 16);
+		if (rcGetCon(as, dir) != RC_NOT_CONNECTED)
+		{
+			const int ax2 = ax + rcGetDirOffsetX(dir);
+			const int ay2 = ay + rcGetDirOffsetY(dir);
+			const int ai2 = (int)chf.cells[ax2+ay2*chf.width].index + rcGetCon(as, dir);
+			const rcCompactSpan& as2 = chf.spans[ai2];
+			ch = rcMax(ch, (int)as2.y);
+			regs[2] = chf.spans[ai2].reg | (chf.areas[ai2] << 16);
+		}
+	}
+
+	// Check if the vertex is special edge vertex, these vertices will be removed later.
+	for (int j = 0; j < 4; ++j)
+	{
+		const int a = j;
+		const int b = (j+1) & 0x3;
+		const int c = (j+2) & 0x3;
+		const int d = (j+3) & 0x3;
+		
+		// The vertex is a border vertex there are two same exterior cells in a row,
+		// followed by two interior cells and none of the regions are out of bounds.
+		const bool twoSameExts = (regs[a] & regs[b] & RC_BORDER_REG) != 0 && regs[a] == regs[b];
+		const bool twoInts = ((regs[c] | regs[d]) & RC_BORDER_REG) == 0;
+		const bool intsSameArea = (regs[c]>>16) == (regs[d]>>16);
+		const bool noZeros = regs[a] != 0 && regs[b] != 0 && regs[c] != 0 && regs[d] != 0;
+		if (twoSameExts && twoInts && intsSameArea && noZeros)
+		{
+			isBorderVertex = true;
+			break;
+		}
+	}
+	
+	return ch;
+}
+
+static void walkContour(int x, int y, int i,
+						rcCompactHeightfield& chf,
+						unsigned char* flags, rcIntArray& points)
+{
+	// Choose the first non-connected edge
+	unsigned char dir = 0;
+	while ((flags[i] & (1 << dir)) == 0)
+		dir++;
+	
+	unsigned char startDir = dir;
+	int starti = i;
+	
+	const unsigned char area = chf.areas[i];
+	
+	int iter = 0;
+	while (++iter < 40000)
+	{
+		if (flags[i] & (1 << dir))
+		{
+			// Choose the edge corner
+			bool isBorderVertex = false;
+			bool isAreaBorder = false;
+			int px = x;
+			int py = getCornerHeight(x, y, i, dir, chf, isBorderVertex);
+			int pz = y;
+			switch(dir)
+			{
+				case 0: pz++; break;
+				case 1: px++; pz++; break;
+				case 2: px++; break;
+			}
+			int r = 0;
+			const rcCompactSpan& s = chf.spans[i];
+			if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+			{
+				const int ax = x + rcGetDirOffsetX(dir);
+				const int ay = y + rcGetDirOffsetY(dir);
+				const int ai = (int)chf.cells[ax+ay*chf.width].index + rcGetCon(s, dir);
+				r = (int)chf.spans[ai].reg;
+				if (area != chf.areas[ai])
+					isAreaBorder = true;
+			}
+			if (isBorderVertex)
+				r |= RC_BORDER_VERTEX;
+			if (isAreaBorder)
+				r |= RC_AREA_BORDER;
+			points.push(px);
+			points.push(py);
+			points.push(pz);
+			points.push(r);
+			
+			flags[i] &= ~(1 << dir); // Remove visited edges
+			dir = (dir+1) & 0x3;  // Rotate CW
+		}
+		else
+		{
+			int ni = -1;
+			const int nx = x + rcGetDirOffsetX(dir);
+			const int ny = y + rcGetDirOffsetY(dir);
+			const rcCompactSpan& s = chf.spans[i];
+			if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+			{
+				const rcCompactCell& nc = chf.cells[nx+ny*chf.width];
+				ni = (int)nc.index + rcGetCon(s, dir);
+			}
+			if (ni == -1)
+			{
+				// Should not happen.
+				return;
+			}
+			x = nx;
+			y = ny;
+			i = ni;
+			dir = (dir+3) & 0x3;	// Rotate CCW
+		}
+		
+		if (starti == i && startDir == dir)
+		{
+			break;
+		}
+	}
+}
+
+static float distancePtSeg(const int x, const int z,
+						   const int px, const int pz,
+						   const int qx, const int qz)
+{
+/*	float pqx = (float)(qx - px);
+	float pqy = (float)(qy - py);
+	float pqz = (float)(qz - pz);
+	float dx = (float)(x - px);
+	float dy = (float)(y - py);
+	float dz = (float)(z - pz);
+	float d = pqx*pqx + pqy*pqy + pqz*pqz;
+	float t = pqx*dx + pqy*dy + pqz*dz;
+	if (d > 0)
+		t /= d;
+	if (t < 0)
+		t = 0;
+	else if (t > 1)
+		t = 1;
+	
+	dx = px + t*pqx - x;
+	dy = py + t*pqy - y;
+	dz = pz + t*pqz - z;
+	
+	return dx*dx + dy*dy + dz*dz;*/
+
+	float pqx = (float)(qx - px);
+	float pqz = (float)(qz - pz);
+	float dx = (float)(x - px);
+	float dz = (float)(z - pz);
+	float d = pqx*pqx + pqz*pqz;
+	float t = pqx*dx + pqz*dz;
+	if (d > 0)
+		t /= d;
+	if (t < 0)
+		t = 0;
+	else if (t > 1)
+		t = 1;
+	
+	dx = px + t*pqx - x;
+	dz = pz + t*pqz - z;
+	
+	return dx*dx + dz*dz;
+}
+
+static void simplifyContour(rcIntArray& points, rcIntArray& simplified,
+							const float maxError, const int maxEdgeLen, const int buildFlags)
+{
+	// Add initial points.
+	bool hasConnections = false;
+	for (int i = 0; i < points.size(); i += 4)
+	{
+		if ((points[i+3] & RC_CONTOUR_REG_MASK) != 0)
+		{
+			hasConnections = true;
+			break;
+		}
+	}
+	
+	if (hasConnections)
+	{
+		// The contour has some portals to other regions.
+		// Add a new point to every location where the region changes.
+		for (int i = 0, ni = points.size()/4; i < ni; ++i)
+		{
+			int ii = (i+1) % ni;
+			const bool differentRegs = (points[i*4+3] & RC_CONTOUR_REG_MASK) != (points[ii*4+3] & RC_CONTOUR_REG_MASK);
+			const bool areaBorders = (points[i*4+3] & RC_AREA_BORDER) != (points[ii*4+3] & RC_AREA_BORDER);
+			if (differentRegs || areaBorders)
+			{
+				simplified.push(points[i*4+0]);
+				simplified.push(points[i*4+1]);
+				simplified.push(points[i*4+2]);
+				simplified.push(i);
+			}
+		}       
+	}
+	
+	if (simplified.size() == 0)
+	{
+		// If there is no connections at all,
+		// create some initial points for the simplification process. 
+		// Find lower-left and upper-right vertices of the contour.
+		int llx = points[0];
+		int lly = points[1];
+		int llz = points[2];
+		int lli = 0;
+		int urx = points[0];
+		int ury = points[1];
+		int urz = points[2];
+		int uri = 0;
+		for (int i = 0; i < points.size(); i += 4)
+		{
+			int x = points[i+0];
+			int y = points[i+1];
+			int z = points[i+2];
+			if (x < llx || (x == llx && z < llz))
+			{
+				llx = x;
+				lly = y;
+				llz = z;
+				lli = i/4;
+			}
+			if (x > urx || (x == urx && z > urz))
+			{
+				urx = x;
+				ury = y;
+				urz = z;
+				uri = i/4;
+			}
+		}
+		simplified.push(llx);
+		simplified.push(lly);
+		simplified.push(llz);
+		simplified.push(lli);
+		
+		simplified.push(urx);
+		simplified.push(ury);
+		simplified.push(urz);
+		simplified.push(uri);
+	}
+	
+	// Add points until all raw points are within
+	// error tolerance to the simplified shape.
+	const int pn = points.size()/4;
+	for (int i = 0; i < simplified.size()/4; )
+	{
+		int ii = (i+1) % (simplified.size()/4);
+		
+		const int ax = simplified[i*4+0];
+		const int az = simplified[i*4+2];
+		const int ai = simplified[i*4+3];
+		
+		const int bx = simplified[ii*4+0];
+		const int bz = simplified[ii*4+2];
+		const int bi = simplified[ii*4+3];
+
+		// Find maximum deviation from the segment.
+		float maxd = 0;
+		int maxi = -1;
+		int ci, cinc, endi;
+		
+		// Traverse the segment in lexilogical order so that the
+		// max deviation is calculated similarly when traversing
+		// opposite segments.
+		if (bx > ax || (bx == ax && bz > az))
+		{
+			cinc = 1;
+			ci = (ai+cinc) % pn;
+			endi = bi;
+		}
+		else
+		{
+			cinc = pn-1;
+			ci = (bi+cinc) % pn;
+			endi = ai;
+		}
+		
+		// Tessellate only outer edges or edges between areas.
+		if ((points[ci*4+3] & RC_CONTOUR_REG_MASK) == 0 ||
+			(points[ci*4+3] & RC_AREA_BORDER))
+		{
+			while (ci != endi)
+			{
+				float d = distancePtSeg(points[ci*4+0], points[ci*4+2], ax, az, bx, bz);
+				if (d > maxd)
+				{
+					maxd = d;
+					maxi = ci;
+				}
+				ci = (ci+cinc) % pn;
+			}
+		}
+		
+		
+		// If the max deviation is larger than accepted error,
+		// add new point, else continue to next segment.
+		if (maxi != -1 && maxd > (maxError*maxError))
+		{
+			// Add space for the new point.
+			simplified.resize(simplified.size()+4);
+			const int n = simplified.size()/4;
+			for (int j = n-1; j > i; --j)
+			{
+				simplified[j*4+0] = simplified[(j-1)*4+0];
+				simplified[j*4+1] = simplified[(j-1)*4+1];
+				simplified[j*4+2] = simplified[(j-1)*4+2];
+				simplified[j*4+3] = simplified[(j-1)*4+3];
+			}
+			// Add the point.
+			simplified[(i+1)*4+0] = points[maxi*4+0];
+			simplified[(i+1)*4+1] = points[maxi*4+1];
+			simplified[(i+1)*4+2] = points[maxi*4+2];
+			simplified[(i+1)*4+3] = maxi;
+		}
+		else
+		{
+			++i;
+		}
+	}
+	
+	// Split too long edges.
+	if (maxEdgeLen > 0 && (buildFlags & (RC_CONTOUR_TESS_WALL_EDGES|RC_CONTOUR_TESS_AREA_EDGES)) != 0)
+	{
+		for (int i = 0; i < simplified.size()/4; )
+		{
+			const int ii = (i+1) % (simplified.size()/4);
+			
+			const int ax = simplified[i*4+0];
+			const int az = simplified[i*4+2];
+			const int ai = simplified[i*4+3];
+			
+			const int bx = simplified[ii*4+0];
+			const int bz = simplified[ii*4+2];
+			const int bi = simplified[ii*4+3];
+
+			// Find maximum deviation from the segment.
+			int maxi = -1;
+			int ci = (ai+1) % pn;
+
+			// Tessellate only outer edges or edges between areas.
+			bool tess = false;
+			// Wall edges.
+			if ((buildFlags & RC_CONTOUR_TESS_WALL_EDGES) && (points[ci*4+3] & RC_CONTOUR_REG_MASK) == 0)
+				tess = true;
+			// Edges between areas.
+			if ((buildFlags & RC_CONTOUR_TESS_AREA_EDGES) && (points[ci*4+3] & RC_AREA_BORDER))
+				tess = true;
+			
+			if (tess)
+			{
+				int dx = bx - ax;
+				int dz = bz - az;
+				if (dx*dx + dz*dz > maxEdgeLen*maxEdgeLen)
+				{
+					// Round based on the segments in lexilogical order so that the
+					// max tesselation is consistent regardles in which direction
+					// segments are traversed.
+					const int n = bi < ai ? (bi+pn - ai) : (bi - ai);
+					if (n > 1)
+					{
+						if (bx > ax || (bx == ax && bz > az))
+							maxi = (ai + n/2) % pn;
+						else
+							maxi = (ai + (n+1)/2) % pn;
+					}
+				}
+			}
+			
+			// If the max deviation is larger than accepted error,
+			// add new point, else continue to next segment.
+			if (maxi != -1)
+			{
+				// Add space for the new point.
+				simplified.resize(simplified.size()+4);
+				const int n = simplified.size()/4;
+				for (int j = n-1; j > i; --j)
+				{
+					simplified[j*4+0] = simplified[(j-1)*4+0];
+					simplified[j*4+1] = simplified[(j-1)*4+1];
+					simplified[j*4+2] = simplified[(j-1)*4+2];
+					simplified[j*4+3] = simplified[(j-1)*4+3];
+				}
+				// Add the point.
+				simplified[(i+1)*4+0] = points[maxi*4+0];
+				simplified[(i+1)*4+1] = points[maxi*4+1];
+				simplified[(i+1)*4+2] = points[maxi*4+2];
+				simplified[(i+1)*4+3] = maxi;
+			}
+			else
+			{
+				++i;
+			}
+		}
+	}
+	
+	for (int i = 0; i < simplified.size()/4; ++i)
+	{
+		// The edge vertex flag is take from the current raw point,
+		// and the neighbour region is take from the next raw point.
+		const int ai = (simplified[i*4+3]+1) % pn;
+		const int bi = simplified[i*4+3];
+		simplified[i*4+3] = (points[ai*4+3] & (RC_CONTOUR_REG_MASK|RC_AREA_BORDER)) | (points[bi*4+3] & RC_BORDER_VERTEX);
+	}
+	
+}
+
+static void removeDegenerateSegments(rcIntArray& simplified)
+{
+	// Remove adjacent vertices which are equal on xz-plane,
+	// or else the triangulator will get confused.
+	for (int i = 0; i < simplified.size()/4; ++i)
+	{
+		int ni = i+1;
+		if (ni >= (simplified.size()/4))
+			ni = 0;
+			
+		if (simplified[i*4+0] == simplified[ni*4+0] &&
+			simplified[i*4+2] == simplified[ni*4+2])
+		{
+			// Degenerate segment, remove.
+			for (int j = i; j < simplified.size()/4-1; ++j)
+			{
+				simplified[j*4+0] = simplified[(j+1)*4+0];
+				simplified[j*4+1] = simplified[(j+1)*4+1];
+				simplified[j*4+2] = simplified[(j+1)*4+2];
+				simplified[j*4+3] = simplified[(j+1)*4+3];
+			}
+			simplified.resize(simplified.size()-4);
+		}
+	}
+}
+
+static int calcAreaOfPolygon2D(const int* verts, const int nverts)
+{
+	int area = 0;
+	for (int i = 0, j = nverts-1; i < nverts; j=i++)
+	{
+		const int* vi = &verts[i*4];
+		const int* vj = &verts[j*4];
+		area += vi[0] * vj[2] - vj[0] * vi[2];
+	}
+	return (area+1) / 2;
+}
+
+inline bool ileft(const int* a, const int* b, const int* c)
+{
+	return (b[0] - a[0]) * (c[2] - a[2]) - (c[0] - a[0]) * (b[2] - a[2]) <= 0;
+}
+
+static void getClosestIndices(const int* vertsa, const int nvertsa,
+							  const int* vertsb, const int nvertsb,
+							  int& ia, int& ib)
+{
+	int closestDist = 0xfffffff;
+	ia = -1, ib = -1;
+	for (int i = 0; i < nvertsa; ++i)
+	{
+		const int in = (i+1) % nvertsa;
+		const int ip = (i+nvertsa-1) % nvertsa;
+		const int* va = &vertsa[i*4];
+		const int* van = &vertsa[in*4];
+		const int* vap = &vertsa[ip*4];
+		
+		for (int j = 0; j < nvertsb; ++j)
+		{
+			const int* vb = &vertsb[j*4];
+			// vb must be "infront" of va.
+			if (ileft(vap,va,vb) && ileft(va,van,vb))
+			{
+				const int dx = vb[0] - va[0];
+				const int dz = vb[2] - va[2];
+				const int d = dx*dx + dz*dz;
+				if (d < closestDist)
+				{
+					ia = i;
+					ib = j;
+					closestDist = d;
+				}
+			}
+		}
+	}
+}
+
+static bool mergeContours(rcContour& ca, rcContour& cb, int ia, int ib)
+{
+	const int maxVerts = ca.nverts + cb.nverts + 2;
+	int* verts = (int*)rcAlloc(sizeof(int)*maxVerts*4, RC_ALLOC_PERM);
+	if (!verts)
+		return false;
+
+	int nv = 0;
+
+	// Copy contour A.
+	for (int i = 0; i <= ca.nverts; ++i)
+	{
+		int* dst = &verts[nv*4];
+		const int* src = &ca.verts[((ia+i)%ca.nverts)*4];
+		dst[0] = src[0];
+		dst[1] = src[1];
+		dst[2] = src[2];
+		dst[3] = src[3];
+		nv++;
+	}
+
+	// Copy contour B
+	for (int i = 0; i <= cb.nverts; ++i)
+	{
+		int* dst = &verts[nv*4];
+		const int* src = &cb.verts[((ib+i)%cb.nverts)*4];
+		dst[0] = src[0];
+		dst[1] = src[1];
+		dst[2] = src[2];
+		dst[3] = src[3];
+		nv++;
+	}
+	
+	rcFree(ca.verts);
+	ca.verts = verts;
+	ca.nverts = nv;
+
+	rcFree(cb.verts);
+	cb.verts = 0;
+	cb.nverts = 0;
+	
+	return true;
+}
+
+/// @par
+///
+/// The raw contours will match the region outlines exactly. The @p maxError and @p maxEdgeLen
+/// parameters control how closely the simplified contours will match the raw contours.
+///
+/// Simplified contours are generated such that the vertices for portals between areas match up. 
+/// (They are considered mandatory vertices.)
+///
+/// Setting @p maxEdgeLength to zero will disabled the edge length feature.
+/// 
+/// See the #rcConfig documentation for more information on the configuration parameters.
+/// 
+/// @see rcAllocContourSet, rcCompactHeightfield, rcContourSet, rcConfig
+bool rcBuildContours(rcContext* ctx, rcCompactHeightfield& chf,
+					 const float maxError, const int maxEdgeLen,
+					 rcContourSet& cset, const int buildFlags)
+{
+	rcAssert(ctx);
+	
+	const int w = chf.width;
+	const int h = chf.height;
+	const int borderSize = chf.borderSize;
+	
+	ctx->startTimer(RC_TIMER_BUILD_CONTOURS);
+	
+	rcVcopy(cset.bmin, chf.bmin);
+	rcVcopy(cset.bmax, chf.bmax);
+	if (borderSize > 0)
+	{
+		// If the heightfield was build with bordersize, remove the offset.
+		const float pad = borderSize*chf.cs;
+		cset.bmin[0] += pad;
+		cset.bmin[2] += pad;
+		cset.bmax[0] -= pad;
+		cset.bmax[2] -= pad;
+	}
+	cset.cs = chf.cs;
+	cset.ch = chf.ch;
+	cset.width = chf.width - chf.borderSize*2;
+	cset.height = chf.height - chf.borderSize*2;
+	cset.borderSize = chf.borderSize;
+	
+	int maxContours = rcMax((int)chf.maxRegions, 8);
+	cset.conts = (rcContour*)rcAlloc(sizeof(rcContour)*maxContours, RC_ALLOC_PERM);
+	if (!cset.conts)
+		return false;
+	cset.nconts = 0;
+	
+	rcScopedDelete<unsigned char> flags = (unsigned char*)rcAlloc(sizeof(unsigned char)*chf.spanCount, RC_ALLOC_TEMP);
+	if (!flags)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildContours: Out of memory 'flags' (%d).", chf.spanCount);
+		return false;
+	}
+	
+	ctx->startTimer(RC_TIMER_BUILD_CONTOURS_TRACE);
+	
+	// Mark boundaries.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				unsigned char res = 0;
+				const rcCompactSpan& s = chf.spans[i];
+				if (!chf.spans[i].reg || (chf.spans[i].reg & RC_BORDER_REG))
+				{
+					flags[i] = 0;
+					continue;
+				}
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					unsigned short r = 0;
+					if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+					{
+						const int ax = x + rcGetDirOffsetX(dir);
+						const int ay = y + rcGetDirOffsetY(dir);
+						const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, dir);
+						r = chf.spans[ai].reg;
+					}
+					if (r == chf.spans[i].reg)
+						res |= (1 << dir);
+				}
+				flags[i] = res ^ 0xf; // Inverse, mark non connected edges.
+			}
+		}
+	}
+	
+	ctx->stopTimer(RC_TIMER_BUILD_CONTOURS_TRACE);
+	
+	rcIntArray verts(256);
+	rcIntArray simplified(64);
+	
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				if (flags[i] == 0 || flags[i] == 0xf)
+				{
+					flags[i] = 0;
+					continue;
+				}
+				const unsigned short reg = chf.spans[i].reg;
+				if (!reg || (reg & RC_BORDER_REG))
+					continue;
+				const unsigned char area = chf.areas[i];
+				
+				verts.resize(0);
+				simplified.resize(0);
+
+				ctx->startTimer(RC_TIMER_BUILD_CONTOURS_TRACE);
+				walkContour(x, y, i, chf, flags, verts);
+				ctx->stopTimer(RC_TIMER_BUILD_CONTOURS_TRACE);
+
+				ctx->startTimer(RC_TIMER_BUILD_CONTOURS_SIMPLIFY);
+				simplifyContour(verts, simplified, maxError, maxEdgeLen, buildFlags);
+				removeDegenerateSegments(simplified);
+				ctx->stopTimer(RC_TIMER_BUILD_CONTOURS_SIMPLIFY);
+				
+
+				// Store region->contour remap info.
+				// Create contour.
+				if (simplified.size()/4 >= 3)
+				{
+					if (cset.nconts >= maxContours)
+					{
+						// Allocate more contours.
+						// This can happen when there are tiny holes in the heightfield.
+						const int oldMax = maxContours;
+						maxContours *= 2;
+						rcContour* newConts = (rcContour*)rcAlloc(sizeof(rcContour)*maxContours, RC_ALLOC_PERM);
+						for (int j = 0; j < cset.nconts; ++j)
+						{
+							newConts[j] = cset.conts[j];
+							// Reset source pointers to prevent data deletion.
+							cset.conts[j].verts = 0;
+							cset.conts[j].rverts = 0;
+						}
+						rcFree(cset.conts);
+						cset.conts = newConts;
+					
+						ctx->log(RC_LOG_WARNING, "rcBuildContours: Expanding max contours from %d to %d.", oldMax, maxContours);
+					}
+						
+					rcContour* cont = &cset.conts[cset.nconts++];
+					
+					cont->nverts = simplified.size()/4;
+					cont->verts = (int*)rcAlloc(sizeof(int)*cont->nverts*4, RC_ALLOC_PERM);
+					if (!cont->verts)
+					{
+						ctx->log(RC_LOG_ERROR, "rcBuildContours: Out of memory 'verts' (%d).", cont->nverts);
+						return false;
+					}
+					memcpy(cont->verts, &simplified[0], sizeof(int)*cont->nverts*4);
+					if (borderSize > 0)
+					{
+						// If the heightfield was build with bordersize, remove the offset.
+						for (int j = 0; j < cont->nverts; ++j)
+						{
+							int* v = &cont->verts[j*4];
+							v[0] -= borderSize;
+							v[2] -= borderSize;
+						}
+					}
+					
+					cont->nrverts = verts.size()/4;
+					cont->rverts = (int*)rcAlloc(sizeof(int)*cont->nrverts*4, RC_ALLOC_PERM);
+					if (!cont->rverts)
+					{
+						ctx->log(RC_LOG_ERROR, "rcBuildContours: Out of memory 'rverts' (%d).", cont->nrverts);
+						return false;
+					}
+					memcpy(cont->rverts, &verts[0], sizeof(int)*cont->nrverts*4);
+					if (borderSize > 0)
+					{
+						// If the heightfield was build with bordersize, remove the offset.
+						for (int j = 0; j < cont->nrverts; ++j)
+						{
+							int* v = &cont->rverts[j*4];
+							v[0] -= borderSize;
+							v[2] -= borderSize;
+						}
+					}
+					
+/*					cont->cx = cont->cy = cont->cz = 0;
+					for (int i = 0; i < cont->nverts; ++i)
+					{
+						cont->cx += cont->verts[i*4+0];
+						cont->cy += cont->verts[i*4+1];
+						cont->cz += cont->verts[i*4+2];
+					}
+					cont->cx /= cont->nverts;
+					cont->cy /= cont->nverts;
+					cont->cz /= cont->nverts;*/
+					
+					cont->reg = reg;
+					cont->area = area;
+				}
+			}
+		}
+	}
+	
+	// Check and merge droppings.
+	// Sometimes the previous algorithms can fail and create several contours
+	// per area. This pass will try to merge the holes into the main region.
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		rcContour& cont = cset.conts[i];
+		// Check if the contour is would backwards.
+		if (calcAreaOfPolygon2D(cont.verts, cont.nverts) < 0)
+		{
+			// Find another contour which has the same region ID.
+			int mergeIdx = -1;
+			for (int j = 0; j < cset.nconts; ++j)
+			{
+				if (i == j) continue;
+				if (cset.conts[j].nverts && cset.conts[j].reg == cont.reg)
+				{
+					// Make sure the polygon is correctly oriented.
+					if (calcAreaOfPolygon2D(cset.conts[j].verts, cset.conts[j].nverts))
+					{
+						mergeIdx = j;
+						break;
+					}
+				}
+			}
+			if (mergeIdx == -1)
+			{
+				ctx->log(RC_LOG_WARNING, "rcBuildContours: Could not find merge target for bad contour %d.", i);
+			}
+			else
+			{
+				rcContour& mcont = cset.conts[mergeIdx];
+				// Merge by closest points.
+				int ia = 0, ib = 0;
+				getClosestIndices(mcont.verts, mcont.nverts, cont.verts, cont.nverts, ia, ib);
+				if (ia == -1 || ib == -1)
+				{
+					ctx->log(RC_LOG_WARNING, "rcBuildContours: Failed to find merge points for %d and %d.", i, mergeIdx);
+					continue;
+				}
+				if (!mergeContours(mcont, cont, ia, ib))
+				{
+					ctx->log(RC_LOG_WARNING, "rcBuildContours: Failed to merge contours %d and %d.", i, mergeIdx);
+					continue;
+				}
+			}
+		}
+	}
+	
+	ctx->stopTimer(RC_TIMER_BUILD_CONTOURS);
+	
+	return true;
+}
diff --git a/Engine/lib/recast/Recast/Source/RecastFilter.cpp b/Engine/lib/recast/Recast/Source/RecastFilter.cpp
new file mode 100644
index 000000000..bf985c362
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastFilter.cpp
@@ -0,0 +1,207 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAssert.h"
+
+/// @par
+///
+/// Allows the formation of walkable regions that will flow over low lying 
+/// objects such as curbs, and up structures such as stairways. 
+/// 
+/// Two neighboring spans are walkable if: <tt>rcAbs(currentSpan.smax - neighborSpan.smax) < waklableClimb</tt>
+/// 
+/// @warning Will override the effect of #rcFilterLedgeSpans.  So if both filters are used, call
+/// #rcFilterLedgeSpans after calling this filter. 
+///
+/// @see rcHeightfield, rcConfig
+void rcFilterLowHangingWalkableObstacles(rcContext* ctx, const int walkableClimb, rcHeightfield& solid)
+{
+	rcAssert(ctx);
+
+	ctx->startTimer(RC_TIMER_FILTER_LOW_OBSTACLES);
+	
+	const int w = solid.width;
+	const int h = solid.height;
+	
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			rcSpan* ps = 0;
+			bool previousWalkable = false;
+			unsigned char previousArea = RC_NULL_AREA;
+			
+			for (rcSpan* s = solid.spans[x + y*w]; s; ps = s, s = s->next)
+			{
+				const bool walkable = s->area != RC_NULL_AREA;
+				// If current span is not walkable, but there is walkable
+				// span just below it, mark the span above it walkable too.
+				if (!walkable && previousWalkable)
+				{
+					if (rcAbs((int)s->smax - (int)ps->smax) <= walkableClimb)
+						s->area = previousArea;
+				}
+				// Copy walkable flag so that it cannot propagate
+				// past multiple non-walkable objects.
+				previousWalkable = walkable;
+				previousArea = s->area;
+			}
+		}
+	}
+
+	ctx->stopTimer(RC_TIMER_FILTER_LOW_OBSTACLES);
+}
+
+/// @par
+///
+/// A ledge is a span with one or more neighbors whose maximum is further away than @p walkableClimb
+/// from the current span's maximum.
+/// This method removes the impact of the overestimation of conservative voxelization 
+/// so the resulting mesh will not have regions hanging in the air over ledges.
+/// 
+/// A span is a ledge if: <tt>rcAbs(currentSpan.smax - neighborSpan.smax) > walkableClimb</tt>
+/// 
+/// @see rcHeightfield, rcConfig
+void rcFilterLedgeSpans(rcContext* ctx, const int walkableHeight, const int walkableClimb,
+						rcHeightfield& solid)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_FILTER_BORDER);
+
+	const int w = solid.width;
+	const int h = solid.height;
+	const int MAX_HEIGHT = 0xffff;
+	
+	// Mark border spans.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			for (rcSpan* s = solid.spans[x + y*w]; s; s = s->next)
+			{
+				// Skip non walkable spans.
+				if (s->area == RC_NULL_AREA)
+					continue;
+				
+				const int bot = (int)(s->smax);
+				const int top = s->next ? (int)(s->next->smin) : MAX_HEIGHT;
+				
+				// Find neighbours minimum height.
+				int minh = MAX_HEIGHT;
+
+				// Min and max height of accessible neighbours.
+				int asmin = s->smax;
+				int asmax = s->smax;
+
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					int dx = x + rcGetDirOffsetX(dir);
+					int dy = y + rcGetDirOffsetY(dir);
+					// Skip neighbours which are out of bounds.
+					if (dx < 0 || dy < 0 || dx >= w || dy >= h)
+					{
+						minh = rcMin(minh, -walkableClimb - bot);
+						continue;
+					}
+
+					// From minus infinity to the first span.
+					rcSpan* ns = solid.spans[dx + dy*w];
+					int nbot = -walkableClimb;
+					int ntop = ns ? (int)ns->smin : MAX_HEIGHT;
+					// Skip neightbour if the gap between the spans is too small.
+					if (rcMin(top,ntop) - rcMax(bot,nbot) > walkableHeight)
+						minh = rcMin(minh, nbot - bot);
+					
+					// Rest of the spans.
+					for (ns = solid.spans[dx + dy*w]; ns; ns = ns->next)
+					{
+						nbot = (int)ns->smax;
+						ntop = ns->next ? (int)ns->next->smin : MAX_HEIGHT;
+						// Skip neightbour if the gap between the spans is too small.
+						if (rcMin(top,ntop) - rcMax(bot,nbot) > walkableHeight)
+						{
+							minh = rcMin(minh, nbot - bot);
+						
+							// Find min/max accessible neighbour height. 
+							if (rcAbs(nbot - bot) <= walkableClimb)
+							{
+								if (nbot < asmin) asmin = nbot;
+								if (nbot > asmax) asmax = nbot;
+							}
+							
+						}
+					}
+				}
+				
+				// The current span is close to a ledge if the drop to any
+				// neighbour span is less than the walkableClimb.
+				if (minh < -walkableClimb)
+					s->area = RC_NULL_AREA;
+					
+				// If the difference between all neighbours is too large,
+				// we are at steep slope, mark the span as ledge.
+				if ((asmax - asmin) > walkableClimb)
+				{
+					s->area = RC_NULL_AREA;
+				}
+			}
+		}
+	}
+	
+	ctx->stopTimer(RC_TIMER_FILTER_BORDER);
+}	
+
+/// @par
+///
+/// For this filter, the clearance above the span is the distance from the span's 
+/// maximum to the next higher span's minimum. (Same grid column.)
+/// 
+/// @see rcHeightfield, rcConfig
+void rcFilterWalkableLowHeightSpans(rcContext* ctx, int walkableHeight, rcHeightfield& solid)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_FILTER_WALKABLE);
+	
+	const int w = solid.width;
+	const int h = solid.height;
+	const int MAX_HEIGHT = 0xffff;
+	
+	// Remove walkable flag from spans which do not have enough
+	// space above them for the agent to stand there.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			for (rcSpan* s = solid.spans[x + y*w]; s; s = s->next)
+			{
+				const int bot = (int)(s->smax);
+				const int top = s->next ? (int)(s->next->smin) : MAX_HEIGHT;
+				if ((top - bot) <= walkableHeight)
+					s->area = RC_NULL_AREA;
+			}
+		}
+	}
+	
+	ctx->stopTimer(RC_TIMER_FILTER_WALKABLE);
+}
diff --git a/Engine/lib/recast/Recast/Source/RecastLayers.cpp b/Engine/lib/recast/Recast/Source/RecastLayers.cpp
new file mode 100644
index 000000000..c6168107a
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastLayers.cpp
@@ -0,0 +1,620 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <float.h>
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+
+
+static const int RC_MAX_LAYERS = RC_NOT_CONNECTED;
+static const int RC_MAX_NEIS = 16;
+
+struct rcLayerRegion
+{
+	unsigned char layers[RC_MAX_LAYERS];
+	unsigned char neis[RC_MAX_NEIS];
+	unsigned short ymin, ymax;
+	unsigned char layerId;		// Layer ID
+	unsigned char nlayers;		// Layer count
+	unsigned char nneis;		// Neighbour count
+	unsigned char base;			// Flag indicating if the region is hte base of merged regions.
+};
+
+
+static void addUnique(unsigned char* a, unsigned char& an, unsigned char v)
+{
+	const int n = (int)an;
+	for (int i = 0; i < n; ++i)
+		if (a[i] == v)
+			return;
+	a[an] = v;
+	an++;
+}
+
+static bool contains(const unsigned char* a, const unsigned char an, const unsigned char v)
+{
+	const int n = (int)an;
+	for (int i = 0; i < n; ++i)
+		if (a[i] == v)
+			return true;
+	return false;
+}
+
+inline bool overlapRange(const unsigned short amin, const unsigned short amax,
+						 const unsigned short bmin, const unsigned short bmax)
+{
+	return (amin > bmax || amax < bmin) ? false : true;
+}
+
+
+
+struct rcLayerSweepSpan
+{
+	unsigned short ns;	// number samples
+	unsigned char id;	// region id
+	unsigned char nei;	// neighbour id
+};
+
+/// @par
+/// 
+/// See the #rcConfig documentation for more information on the configuration parameters.
+/// 
+/// @see rcAllocHeightfieldLayerSet, rcCompactHeightfield, rcHeightfieldLayerSet, rcConfig
+bool rcBuildHeightfieldLayers(rcContext* ctx, rcCompactHeightfield& chf,
+							  const int borderSize, const int walkableHeight,
+							  rcHeightfieldLayerSet& lset)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_BUILD_LAYERS);
+	
+	const int w = chf.width;
+	const int h = chf.height;
+	
+	rcScopedDelete<unsigned char> srcReg = (unsigned char*)rcAlloc(sizeof(unsigned char)*chf.spanCount, RC_ALLOC_TEMP);
+	if (!srcReg)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Out of memory 'srcReg' (%d).", chf.spanCount);
+		return false;
+	}
+	memset(srcReg,0xff,sizeof(unsigned char)*chf.spanCount);
+	
+	const int nsweeps = chf.width;
+	rcScopedDelete<rcLayerSweepSpan> sweeps = (rcLayerSweepSpan*)rcAlloc(sizeof(rcLayerSweepSpan)*nsweeps, RC_ALLOC_TEMP);
+	if (!sweeps)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Out of memory 'sweeps' (%d).", nsweeps);
+		return false;
+	}
+	
+	
+	// Partition walkable area into monotone regions.
+	int prevCount[256];
+	unsigned char regId = 0;
+
+	for (int y = borderSize; y < h-borderSize; ++y)
+	{
+		memset(prevCount,0,sizeof(int)*regId);
+		unsigned char sweepId = 0;
+		
+		for (int x = borderSize; x < w-borderSize; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				if (chf.areas[i] == RC_NULL_AREA) continue;
+
+				unsigned char sid = 0xff;
+
+				// -x
+				if (rcGetCon(s, 0) != RC_NOT_CONNECTED)
+				{
+					const int ax = x + rcGetDirOffsetX(0);
+					const int ay = y + rcGetDirOffsetY(0);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 0);
+					if (chf.areas[ai] != RC_NULL_AREA && srcReg[ai] != 0xff)
+						sid = srcReg[ai];
+				}
+				
+				if (sid == 0xff)
+				{
+					sid = sweepId++;
+					sweeps[sid].nei = 0xff;
+					sweeps[sid].ns = 0;
+				}
+				
+				// -y
+				if (rcGetCon(s,3) != RC_NOT_CONNECTED)
+				{
+					const int ax = x + rcGetDirOffsetX(3);
+					const int ay = y + rcGetDirOffsetY(3);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 3);
+					const unsigned char nr = srcReg[ai];
+					if (nr != 0xff)
+					{
+						// Set neighbour when first valid neighbour is encoutered.
+						if (sweeps[sid].ns == 0)
+							sweeps[sid].nei = nr;
+						
+						if (sweeps[sid].nei == nr)
+						{
+							// Update existing neighbour
+							sweeps[sid].ns++;
+							prevCount[nr]++;
+						}
+						else
+						{
+							// This is hit if there is nore than one neighbour.
+							// Invalidate the neighbour.
+							sweeps[sid].nei = 0xff;
+						}
+					}
+				}
+				
+				srcReg[i] = sid;
+			}
+		}
+		
+		// Create unique ID.
+		for (int i = 0; i < sweepId; ++i)
+		{
+			// If the neighbour is set and there is only one continuous connection to it,
+			// the sweep will be merged with the previous one, else new region is created.
+			if (sweeps[i].nei != 0xff && prevCount[sweeps[i].nei] == (int)sweeps[i].ns)
+			{
+				sweeps[i].id = sweeps[i].nei;
+			}
+			else
+			{
+				if (regId == 255)
+				{
+					ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Region ID overflow.");
+					return false;
+				}
+				sweeps[i].id = regId++;
+			}
+		}
+		
+		// Remap local sweep ids to region ids.
+		for (int x = borderSize; x < w-borderSize; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				if (srcReg[i] != 0xff)
+					srcReg[i] = sweeps[srcReg[i]].id;
+			}
+		}
+	}
+
+	// Allocate and init layer regions.
+	const int nregs = (int)regId;
+	rcScopedDelete<rcLayerRegion> regs = (rcLayerRegion*)rcAlloc(sizeof(rcLayerRegion)*nregs, RC_ALLOC_TEMP);
+	if (!regs)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Out of memory 'regs' (%d).", nregs);
+		return false;
+	}
+	memset(regs, 0, sizeof(rcLayerRegion)*nregs);
+	for (int i = 0; i < nregs; ++i)
+	{
+		regs[i].layerId = 0xff;
+		regs[i].ymin = 0xffff;
+		regs[i].ymax = 0;
+	}
+	
+	// Find region neighbours and overlapping regions.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			
+			unsigned char lregs[RC_MAX_LAYERS];
+			int nlregs = 0;
+			
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				const unsigned char ri = srcReg[i];
+				if (ri == 0xff) continue;
+				
+				regs[ri].ymin = rcMin(regs[ri].ymin, s.y);
+				regs[ri].ymax = rcMax(regs[ri].ymax, s.y);
+				
+				// Collect all region layers.
+				if (nlregs < RC_MAX_LAYERS)
+					lregs[nlregs++] = ri;
+				
+				// Update neighbours
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+					{
+						const int ax = x + rcGetDirOffsetX(dir);
+						const int ay = y + rcGetDirOffsetY(dir);
+						const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, dir);
+						const unsigned char rai = srcReg[ai];
+						if (rai != 0xff && rai != ri)
+							addUnique(regs[ri].neis, regs[ri].nneis, rai);
+					}
+				}
+				
+			}
+			
+			// Update overlapping regions.
+			for (int i = 0; i < nlregs-1; ++i)
+			{
+				for (int j = i+1; j < nlregs; ++j)
+				{
+					if (lregs[i] != lregs[j])
+					{
+						rcLayerRegion& ri = regs[lregs[i]];
+						rcLayerRegion& rj = regs[lregs[j]];
+						addUnique(ri.layers, ri.nlayers, lregs[j]);
+						addUnique(rj.layers, rj.nlayers, lregs[i]);
+					}
+				}
+			}
+			
+		}
+	}
+	
+	// Create 2D layers from regions.
+	unsigned char layerId = 0;
+	
+	static const int MAX_STACK = 64;
+	unsigned char stack[MAX_STACK];
+	int nstack = 0;
+	
+	for (int i = 0; i < nregs; ++i)
+	{
+		rcLayerRegion& root = regs[i];
+		// Skip alreadu visited.
+		if (root.layerId != 0xff)
+			continue;
+
+		// Start search.
+		root.layerId = layerId;
+		root.base = 1;
+		
+		nstack = 0;
+		stack[nstack++] = (unsigned char)i;
+		
+		while (nstack)
+		{
+			// Pop front
+			rcLayerRegion& reg = regs[stack[0]];
+			nstack--;
+			for (int j = 0; j < nstack; ++j)
+				stack[j] = stack[j+1];
+			
+			const int nneis = (int)reg.nneis;
+			for (int j = 0; j < nneis; ++j)
+			{
+				const unsigned char nei = reg.neis[j];
+				rcLayerRegion& regn = regs[nei];
+				// Skip already visited.
+				if (regn.layerId != 0xff)
+					continue;
+				// Skip if the neighbour is overlapping root region.
+				if (contains(root.layers, root.nlayers, nei))
+					continue;
+				// Skip if the height range would become too large.
+				const int ymin = rcMin(root.ymin, regn.ymin);
+				const int ymax = rcMin(root.ymax, regn.ymax);
+				if ((ymax - ymin) >= 255)
+					 continue;
+
+				if (nstack < MAX_STACK)
+				{
+					// Deepen
+					stack[nstack++] = (unsigned char)nei;
+					
+					// Mark layer id
+					regn.layerId = layerId;
+					// Merge current layers to root.
+					for (int k = 0; k < regn.nlayers; ++k)
+						addUnique(root.layers, root.nlayers, regn.layers[k]);
+					root.ymin = rcMin(root.ymin, regn.ymin);
+					root.ymax = rcMax(root.ymax, regn.ymax);
+				}
+			}
+		}
+		
+		layerId++;
+	}
+	
+	// Merge non-overlapping regions that are close in height.
+	const unsigned short mergeHeight = (unsigned short)walkableHeight * 4;
+	
+	for (int i = 0; i < nregs; ++i)
+	{
+		rcLayerRegion& ri = regs[i];
+		if (!ri.base) continue;
+		
+		unsigned char newId = ri.layerId;
+		
+		for (;;)
+		{
+			unsigned char oldId = 0xff;
+			
+			for (int j = 0; j < nregs; ++j)
+			{
+				if (i == j) continue;
+				rcLayerRegion& rj = regs[j];
+				if (!rj.base) continue;
+				
+				// Skip if teh regions are not close to each other.
+				if (!overlapRange(ri.ymin,ri.ymax+mergeHeight, rj.ymin,rj.ymax+mergeHeight))
+					continue;
+				// Skip if the height range would become too large.
+				const int ymin = rcMin(ri.ymin, rj.ymin);
+				const int ymax = rcMin(ri.ymax, rj.ymax);
+				if ((ymax - ymin) >= 255)
+				  continue;
+						  
+				// Make sure that there is no overlap when mergin 'ri' and 'rj'.
+				bool overlap = false;
+				// Iterate over all regions which have the same layerId as 'rj'
+				for (int k = 0; k < nregs; ++k)
+				{
+					if (regs[k].layerId != rj.layerId)
+						continue;
+					// Check if region 'k' is overlapping region 'ri'
+					// Index to 'regs' is the same as region id.
+					if (contains(ri.layers,ri.nlayers, (unsigned char)k))
+					{
+						overlap = true;
+						break;
+					}
+				}
+				// Cannot merge of regions overlap.
+				if (overlap)
+					continue;
+				
+				// Can merge i and j.
+				oldId = rj.layerId;
+				break;
+			}
+			
+			// Could not find anything to merge with, stop.
+			if (oldId == 0xff)
+				break;
+			
+			// Merge
+			for (int j = 0; j < nregs; ++j)
+			{
+				rcLayerRegion& rj = regs[j];
+				if (rj.layerId == oldId)
+				{
+					rj.base = 0;
+					// Remap layerIds.
+					rj.layerId = newId;
+					// Add overlaid layers from 'rj' to 'ri'.
+					for (int k = 0; k < rj.nlayers; ++k)
+						addUnique(ri.layers, ri.nlayers, rj.layers[k]);
+					// Update heigh bounds.
+					ri.ymin = rcMin(ri.ymin, rj.ymin);
+					ri.ymax = rcMax(ri.ymax, rj.ymax);
+				}
+			}
+		}
+	}
+	
+	// Compact layerIds
+	unsigned char remap[256];
+	memset(remap, 0, 256);
+
+	// Find number of unique layers.
+	layerId = 0;
+	for (int i = 0; i < nregs; ++i)
+		remap[regs[i].layerId] = 1;
+	for (int i = 0; i < 256; ++i)
+	{
+		if (remap[i])
+			remap[i] = layerId++;
+		else
+			remap[i] = 0xff;
+	}
+	// Remap ids.
+	for (int i = 0; i < nregs; ++i)
+		regs[i].layerId = remap[regs[i].layerId];
+	
+	// No layers, return empty.
+	if (layerId == 0)
+	{
+		ctx->stopTimer(RC_TIMER_BUILD_LAYERS);
+		return true;
+	}
+	
+	// Create layers.
+	rcAssert(lset.layers == 0);
+	
+	const int lw = w - borderSize*2;
+	const int lh = h - borderSize*2;
+
+	// Build contracted bbox for layers.
+	float bmin[3], bmax[3];
+	rcVcopy(bmin, chf.bmin);
+	rcVcopy(bmax, chf.bmax);
+	bmin[0] += borderSize*chf.cs;
+	bmin[2] += borderSize*chf.cs;
+	bmax[0] -= borderSize*chf.cs;
+	bmax[2] -= borderSize*chf.cs;
+	
+	lset.nlayers = (int)layerId;
+	
+	lset.layers = (rcHeightfieldLayer*)rcAlloc(sizeof(rcHeightfieldLayer)*lset.nlayers, RC_ALLOC_PERM);
+	if (!lset.layers)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Out of memory 'layers' (%d).", lset.nlayers);
+		return false;
+	}
+	memset(lset.layers, 0, sizeof(rcHeightfieldLayer)*lset.nlayers);
+
+	
+	// Store layers.
+	for (int i = 0; i < lset.nlayers; ++i)
+	{
+		unsigned char curId = (unsigned char)i;
+		
+		// Allocate memory for the current layer.
+		rcHeightfieldLayer* layer = &lset.layers[i];
+		memset(layer, 0, sizeof(rcHeightfieldLayer));
+
+		const int gridSize = sizeof(unsigned char)*lw*lh;
+
+		layer->heights = (unsigned char*)rcAlloc(gridSize, RC_ALLOC_PERM);
+		if (!layer->heights)
+		{
+			ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Out of memory 'heights' (%d).", gridSize);
+			return false;
+		}
+		memset(layer->heights, 0xff, gridSize);
+
+		layer->areas = (unsigned char*)rcAlloc(gridSize, RC_ALLOC_PERM);
+		if (!layer->areas)
+		{
+			ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Out of memory 'areas' (%d).", gridSize);
+			return false;
+		}
+		memset(layer->areas, 0, gridSize);
+
+		layer->cons = (unsigned char*)rcAlloc(gridSize, RC_ALLOC_PERM);
+		if (!layer->cons)
+		{
+			ctx->log(RC_LOG_ERROR, "rcBuildHeightfieldLayers: Out of memory 'cons' (%d).", gridSize);
+			return false;
+		}
+		memset(layer->cons, 0, gridSize);
+		
+		// Find layer height bounds.
+		int hmin = 0, hmax = 0;
+		for (int j = 0; j < nregs; ++j)
+		{
+			if (regs[j].base && regs[j].layerId == curId)
+			{
+				hmin = (int)regs[j].ymin;
+				hmax = (int)regs[j].ymax;
+			}
+		}
+
+		layer->width = lw;
+		layer->height = lh;
+		layer->cs = chf.cs;
+		layer->ch = chf.ch;
+		
+		// Adjust the bbox to fit the heighfield.
+		rcVcopy(layer->bmin, bmin);
+		rcVcopy(layer->bmax, bmax);
+		layer->bmin[1] = bmin[1] + hmin*chf.ch;
+		layer->bmax[1] = bmin[1] + hmax*chf.ch;
+		layer->hmin = hmin;
+		layer->hmax = hmax;
+
+		// Update usable data region.
+		layer->minx = layer->width;
+		layer->maxx = 0;
+		layer->miny = layer->height;
+		layer->maxy = 0;
+		
+		// Copy height and area from compact heighfield. 
+		for (int y = 0; y < lh; ++y)
+		{
+			for (int x = 0; x < lw; ++x)
+			{
+				const int cx = borderSize+x;
+				const int cy = borderSize+y;
+				const rcCompactCell& c = chf.cells[cx+cy*w];
+				for (int j = (int)c.index, nj = (int)(c.index+c.count); j < nj; ++j)
+				{
+					const rcCompactSpan& s = chf.spans[j];
+					// Skip unassigned regions.
+					if (srcReg[j] == 0xff)
+						continue;
+					// Skip of does nto belong to current layer.
+					unsigned char lid = regs[srcReg[j]].layerId;
+					if (lid != curId)
+						continue;
+					
+					// Update data bounds.
+					layer->minx = rcMin(layer->minx, x);
+					layer->maxx = rcMax(layer->maxx, x);
+					layer->miny = rcMin(layer->miny, y);
+					layer->maxy = rcMax(layer->maxy, y);
+					
+					// Store height and area type.
+					const int idx = x+y*lw;
+					layer->heights[idx] = (unsigned char)(s.y - hmin);
+					layer->areas[idx] = chf.areas[j];
+					
+					// Check connection.
+					unsigned char portal = 0;
+					unsigned char con = 0;
+					for (int dir = 0; dir < 4; ++dir)
+					{
+						if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+						{
+							const int ax = cx + rcGetDirOffsetX(dir);
+							const int ay = cy + rcGetDirOffsetY(dir);
+							const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, dir);
+							unsigned char alid = srcReg[ai] != 0xff ? regs[srcReg[ai]].layerId : 0xff;
+							// Portal mask
+							if (chf.areas[ai] != RC_NULL_AREA && lid != alid)
+							{
+								portal |= (unsigned char)(1<<dir);
+								// Update height so that it matches on both sides of the portal.
+								const rcCompactSpan& as = chf.spans[ai];
+								if (as.y > hmin)
+									layer->heights[idx] = rcMax(layer->heights[idx], (unsigned char)(as.y - hmin));
+							}
+							// Valid connection mask
+							if (chf.areas[ai] != RC_NULL_AREA && lid == alid)
+							{
+								const int nx = ax - borderSize;
+								const int ny = ay - borderSize;
+								if (nx >= 0 && ny >= 0 && nx < lw && ny < lh)
+									con |= (unsigned char)(1<<dir);
+							}
+						}
+					}
+					
+					layer->cons[idx] = (portal << 4) | con;
+				}
+			}
+		}
+		
+		if (layer->minx > layer->maxx)
+			layer->minx = layer->maxx = 0;
+		if (layer->miny > layer->maxy)
+			layer->miny = layer->maxy = 0;
+	}
+	
+	ctx->stopTimer(RC_TIMER_BUILD_LAYERS);
+	
+	return true;
+}
diff --git a/Engine/lib/recast/Recast/Source/RecastMesh.cpp b/Engine/lib/recast/Recast/Source/RecastMesh.cpp
new file mode 100644
index 000000000..13aad2af0
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastMesh.cpp
@@ -0,0 +1,1428 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+
+struct rcEdge
+{
+	unsigned short vert[2];
+	unsigned short polyEdge[2];
+	unsigned short poly[2];
+};
+
+static bool buildMeshAdjacency(unsigned short* polys, const int npolys,
+							   const int nverts, const int vertsPerPoly)
+{
+	// Based on code by Eric Lengyel from:
+	// http://www.terathon.com/code/edges.php
+	
+	int maxEdgeCount = npolys*vertsPerPoly;
+	unsigned short* firstEdge = (unsigned short*)rcAlloc(sizeof(unsigned short)*(nverts + maxEdgeCount), RC_ALLOC_TEMP);
+	if (!firstEdge)
+		return false;
+	unsigned short* nextEdge = firstEdge + nverts;
+	int edgeCount = 0;
+	
+	rcEdge* edges = (rcEdge*)rcAlloc(sizeof(rcEdge)*maxEdgeCount, RC_ALLOC_TEMP);
+	if (!edges)
+	{
+		rcFree(firstEdge);
+		return false;
+	}
+	
+	for (int i = 0; i < nverts; i++)
+		firstEdge[i] = RC_MESH_NULL_IDX;
+	
+	for (int i = 0; i < npolys; ++i)
+	{
+		unsigned short* t = &polys[i*vertsPerPoly*2];
+		for (int j = 0; j < vertsPerPoly; ++j)
+		{
+			if (t[j] == RC_MESH_NULL_IDX) break;
+			unsigned short v0 = t[j];
+			unsigned short v1 = (j+1 >= vertsPerPoly || t[j+1] == RC_MESH_NULL_IDX) ? t[0] : t[j+1];
+			if (v0 < v1)
+			{
+				rcEdge& edge = edges[edgeCount];
+				edge.vert[0] = v0;
+				edge.vert[1] = v1;
+				edge.poly[0] = (unsigned short)i;
+				edge.polyEdge[0] = (unsigned short)j;
+				edge.poly[1] = (unsigned short)i;
+				edge.polyEdge[1] = 0;
+				// Insert edge
+				nextEdge[edgeCount] = firstEdge[v0];
+				firstEdge[v0] = (unsigned short)edgeCount;
+				edgeCount++;
+			}
+		}
+	}
+	
+	for (int i = 0; i < npolys; ++i)
+	{
+		unsigned short* t = &polys[i*vertsPerPoly*2];
+		for (int j = 0; j < vertsPerPoly; ++j)
+		{
+			if (t[j] == RC_MESH_NULL_IDX) break;
+			unsigned short v0 = t[j];
+			unsigned short v1 = (j+1 >= vertsPerPoly || t[j+1] == RC_MESH_NULL_IDX) ? t[0] : t[j+1];
+			if (v0 > v1)
+			{
+				for (unsigned short e = firstEdge[v1]; e != RC_MESH_NULL_IDX; e = nextEdge[e])
+				{
+					rcEdge& edge = edges[e];
+					if (edge.vert[1] == v0 && edge.poly[0] == edge.poly[1])
+					{
+						edge.poly[1] = (unsigned short)i;
+						edge.polyEdge[1] = (unsigned short)j;
+						break;
+					}
+				}
+			}
+		}
+	}
+	
+	// Store adjacency
+	for (int i = 0; i < edgeCount; ++i)
+	{
+		const rcEdge& e = edges[i];
+		if (e.poly[0] != e.poly[1])
+		{
+			unsigned short* p0 = &polys[e.poly[0]*vertsPerPoly*2];
+			unsigned short* p1 = &polys[e.poly[1]*vertsPerPoly*2];
+			p0[vertsPerPoly + e.polyEdge[0]] = e.poly[1];
+			p1[vertsPerPoly + e.polyEdge[1]] = e.poly[0];
+		}
+	}
+	
+	rcFree(firstEdge);
+	rcFree(edges);
+	
+	return true;
+}
+
+
+static const int VERTEX_BUCKET_COUNT = (1<<12);
+
+inline int computeVertexHash(int x, int y, int z)
+{
+	const unsigned int h1 = 0x8da6b343; // Large multiplicative constants;
+	const unsigned int h2 = 0xd8163841; // here arbitrarily chosen primes
+	const unsigned int h3 = 0xcb1ab31f;
+	unsigned int n = h1 * x + h2 * y + h3 * z;
+	return (int)(n & (VERTEX_BUCKET_COUNT-1));
+}
+
+static unsigned short addVertex(unsigned short x, unsigned short y, unsigned short z,
+								unsigned short* verts, int* firstVert, int* nextVert, int& nv)
+{
+	int bucket = computeVertexHash(x, 0, z);
+	int i = firstVert[bucket];
+	
+	while (i != -1)
+	{
+		const unsigned short* v = &verts[i*3];
+		if (v[0] == x && (rcAbs(v[1] - y) <= 2) && v[2] == z)
+			return (unsigned short)i;
+		i = nextVert[i]; // next
+	}
+	
+	// Could not find, create new.
+	i = nv; nv++;
+	unsigned short* v = &verts[i*3];
+	v[0] = x;
+	v[1] = y;
+	v[2] = z;
+	nextVert[i] = firstVert[bucket];
+	firstVert[bucket] = i;
+	
+	return (unsigned short)i;
+}
+
+inline int prev(int i, int n) { return i-1 >= 0 ? i-1 : n-1; }
+inline int next(int i, int n) { return i+1 < n ? i+1 : 0; }
+
+inline int area2(const int* a, const int* b, const int* c)
+{
+	return (b[0] - a[0]) * (c[2] - a[2]) - (c[0] - a[0]) * (b[2] - a[2]);
+}
+
+//	Exclusive or: true iff exactly one argument is true.
+//	The arguments are negated to ensure that they are 0/1
+//	values.  Then the bitwise Xor operator may apply.
+//	(This idea is due to Michael Baldwin.)
+inline bool xorb(bool x, bool y)
+{
+	return !x ^ !y;
+}
+
+// Returns true iff c is strictly to the left of the directed
+// line through a to b.
+inline bool left(const int* a, const int* b, const int* c)
+{
+	return area2(a, b, c) < 0;
+}
+
+inline bool leftOn(const int* a, const int* b, const int* c)
+{
+	return area2(a, b, c) <= 0;
+}
+
+inline bool collinear(const int* a, const int* b, const int* c)
+{
+	return area2(a, b, c) == 0;
+}
+
+//	Returns true iff ab properly intersects cd: they share
+//	a point interior to both segments.  The properness of the
+//	intersection is ensured by using strict leftness.
+static bool intersectProp(const int* a, const int* b, const int* c, const int* d)
+{
+	// Eliminate improper cases.
+	if (collinear(a,b,c) || collinear(a,b,d) ||
+		collinear(c,d,a) || collinear(c,d,b))
+		return false;
+	
+	return xorb(left(a,b,c), left(a,b,d)) && xorb(left(c,d,a), left(c,d,b));
+}
+
+// Returns T iff (a,b,c) are collinear and point c lies 
+// on the closed segement ab.
+static bool between(const int* a, const int* b, const int* c)
+{
+	if (!collinear(a, b, c))
+		return false;
+	// If ab not vertical, check betweenness on x; else on y.
+	if (a[0] != b[0])
+		return	((a[0] <= c[0]) && (c[0] <= b[0])) || ((a[0] >= c[0]) && (c[0] >= b[0]));
+	else
+		return	((a[2] <= c[2]) && (c[2] <= b[2])) || ((a[2] >= c[2]) && (c[2] >= b[2]));
+}
+
+// Returns true iff segments ab and cd intersect, properly or improperly.
+static bool intersect(const int* a, const int* b, const int* c, const int* d)
+{
+	if (intersectProp(a, b, c, d))
+		return true;
+	else if (between(a, b, c) || between(a, b, d) ||
+			 between(c, d, a) || between(c, d, b))
+		return true;
+	else
+		return false;
+}
+
+static bool vequal(const int* a, const int* b)
+{
+	return a[0] == b[0] && a[2] == b[2];
+}
+
+// Returns T iff (v_i, v_j) is a proper internal *or* external
+// diagonal of P, *ignoring edges incident to v_i and v_j*.
+static bool diagonalie(int i, int j, int n, const int* verts, int* indices)
+{
+	const int* d0 = &verts[(indices[i] & 0x0fffffff) * 4];
+	const int* d1 = &verts[(indices[j] & 0x0fffffff) * 4];
+	
+	// For each edge (k,k+1) of P
+	for (int k = 0; k < n; k++)
+	{
+		int k1 = next(k, n);
+		// Skip edges incident to i or j
+		if (!((k == i) || (k1 == i) || (k == j) || (k1 == j)))
+		{
+			const int* p0 = &verts[(indices[k] & 0x0fffffff) * 4];
+			const int* p1 = &verts[(indices[k1] & 0x0fffffff) * 4];
+
+			if (vequal(d0, p0) || vequal(d1, p0) || vequal(d0, p1) || vequal(d1, p1))
+				continue;
+			
+			if (intersect(d0, d1, p0, p1))
+				return false;
+		}
+	}
+	return true;
+}
+
+// Returns true iff the diagonal (i,j) is strictly internal to the 
+// polygon P in the neighborhood of the i endpoint.
+static bool	inCone(int i, int j, int n, const int* verts, int* indices)
+{
+	const int* pi = &verts[(indices[i] & 0x0fffffff) * 4];
+	const int* pj = &verts[(indices[j] & 0x0fffffff) * 4];
+	const int* pi1 = &verts[(indices[next(i, n)] & 0x0fffffff) * 4];
+	const int* pin1 = &verts[(indices[prev(i, n)] & 0x0fffffff) * 4];
+
+	// If P[i] is a convex vertex [ i+1 left or on (i-1,i) ].
+	if (leftOn(pin1, pi, pi1))
+		return left(pi, pj, pin1) && left(pj, pi, pi1);
+	// Assume (i-1,i,i+1) not collinear.
+	// else P[i] is reflex.
+	return !(leftOn(pi, pj, pi1) && leftOn(pj, pi, pin1));
+}
+
+// Returns T iff (v_i, v_j) is a proper internal
+// diagonal of P.
+static bool diagonal(int i, int j, int n, const int* verts, int* indices)
+{
+	return inCone(i, j, n, verts, indices) && diagonalie(i, j, n, verts, indices);
+}
+
+static int triangulate(int n, const int* verts, int* indices, int* tris)
+{
+	int ntris = 0;
+	int* dst = tris;
+	
+	// The last bit of the index is used to indicate if the vertex can be removed.
+	for (int i = 0; i < n; i++)
+	{
+		int i1 = next(i, n);
+		int i2 = next(i1, n);
+		if (diagonal(i, i2, n, verts, indices))
+			indices[i1] |= 0x80000000;
+	}
+	
+	while (n > 3)
+	{
+		int minLen = -1;
+		int mini = -1;
+		for (int i = 0; i < n; i++)
+		{
+			int i1 = next(i, n);
+			if (indices[i1] & 0x80000000)
+			{
+				const int* p0 = &verts[(indices[i] & 0x0fffffff) * 4];
+				const int* p2 = &verts[(indices[next(i1, n)] & 0x0fffffff) * 4];
+				
+				int dx = p2[0] - p0[0];
+				int dy = p2[2] - p0[2];
+				int len = dx*dx + dy*dy;
+				
+				if (minLen < 0 || len < minLen)
+				{
+					minLen = len;
+					mini = i;
+				}
+			}
+		}
+		
+		if (mini == -1)
+		{
+			// Should not happen.
+/*			printf("mini == -1 ntris=%d n=%d\n", ntris, n);
+			for (int i = 0; i < n; i++)
+			{
+				printf("%d ", indices[i] & 0x0fffffff);
+			}
+			printf("\n");*/
+			return -ntris;
+		}
+		
+		int i = mini;
+		int i1 = next(i, n);
+		int i2 = next(i1, n);
+		
+		*dst++ = indices[i] & 0x0fffffff;
+		*dst++ = indices[i1] & 0x0fffffff;
+		*dst++ = indices[i2] & 0x0fffffff;
+		ntris++;
+		
+		// Removes P[i1] by copying P[i+1]...P[n-1] left one index.
+		n--;
+		for (int k = i1; k < n; k++)
+			indices[k] = indices[k+1];
+		
+		if (i1 >= n) i1 = 0;
+		i = prev(i1,n);
+		// Update diagonal flags.
+		if (diagonal(prev(i, n), i1, n, verts, indices))
+			indices[i] |= 0x80000000;
+		else
+			indices[i] &= 0x0fffffff;
+		
+		if (diagonal(i, next(i1, n), n, verts, indices))
+			indices[i1] |= 0x80000000;
+		else
+			indices[i1] &= 0x0fffffff;
+	}
+	
+	// Append the remaining triangle.
+	*dst++ = indices[0] & 0x0fffffff;
+	*dst++ = indices[1] & 0x0fffffff;
+	*dst++ = indices[2] & 0x0fffffff;
+	ntris++;
+	
+	return ntris;
+}
+
+static int countPolyVerts(const unsigned short* p, const int nvp)
+{
+	for (int i = 0; i < nvp; ++i)
+		if (p[i] == RC_MESH_NULL_IDX)
+			return i;
+	return nvp;
+}
+
+inline bool uleft(const unsigned short* a, const unsigned short* b, const unsigned short* c)
+{
+	return ((int)b[0] - (int)a[0]) * ((int)c[2] - (int)a[2]) -
+		   ((int)c[0] - (int)a[0]) * ((int)b[2] - (int)a[2]) < 0;
+}
+
+static int getPolyMergeValue(unsigned short* pa, unsigned short* pb,
+							 const unsigned short* verts, int& ea, int& eb,
+							 const int nvp)
+{
+	const int na = countPolyVerts(pa, nvp);
+	const int nb = countPolyVerts(pb, nvp);
+	
+	// If the merged polygon would be too big, do not merge.
+	if (na+nb-2 > nvp)
+		return -1;
+	
+	// Check if the polygons share an edge.
+	ea = -1;
+	eb = -1;
+	
+	for (int i = 0; i < na; ++i)
+	{
+		unsigned short va0 = pa[i];
+		unsigned short va1 = pa[(i+1) % na];
+		if (va0 > va1)
+			rcSwap(va0, va1);
+		for (int j = 0; j < nb; ++j)
+		{
+			unsigned short vb0 = pb[j];
+			unsigned short vb1 = pb[(j+1) % nb];
+			if (vb0 > vb1)
+				rcSwap(vb0, vb1);
+			if (va0 == vb0 && va1 == vb1)
+			{
+				ea = i;
+				eb = j;
+				break;
+			}
+		}
+	}
+	
+	// No common edge, cannot merge.
+	if (ea == -1 || eb == -1)
+		return -1;
+	
+	// Check to see if the merged polygon would be convex.
+	unsigned short va, vb, vc;
+	
+	va = pa[(ea+na-1) % na];
+	vb = pa[ea];
+	vc = pb[(eb+2) % nb];
+	if (!uleft(&verts[va*3], &verts[vb*3], &verts[vc*3]))
+		return -1;
+	
+	va = pb[(eb+nb-1) % nb];
+	vb = pb[eb];
+	vc = pa[(ea+2) % na];
+	if (!uleft(&verts[va*3], &verts[vb*3], &verts[vc*3]))
+		return -1;
+	
+	va = pa[ea];
+	vb = pa[(ea+1)%na];
+	
+	int dx = (int)verts[va*3+0] - (int)verts[vb*3+0];
+	int dy = (int)verts[va*3+2] - (int)verts[vb*3+2];
+	
+	return dx*dx + dy*dy;
+}
+
+static void mergePolys(unsigned short* pa, unsigned short* pb, int ea, int eb,
+					   unsigned short* tmp, const int nvp)
+{
+	const int na = countPolyVerts(pa, nvp);
+	const int nb = countPolyVerts(pb, nvp);
+	
+	// Merge polygons.
+	memset(tmp, 0xff, sizeof(unsigned short)*nvp);
+	int n = 0;
+	// Add pa
+	for (int i = 0; i < na-1; ++i)
+		tmp[n++] = pa[(ea+1+i) % na];
+	// Add pb
+	for (int i = 0; i < nb-1; ++i)
+		tmp[n++] = pb[(eb+1+i) % nb];
+	
+	memcpy(pa, tmp, sizeof(unsigned short)*nvp);
+}
+
+
+static void pushFront(int v, int* arr, int& an)
+{
+	an++;
+	for (int i = an-1; i > 0; --i) arr[i] = arr[i-1];
+	arr[0] = v;
+}
+
+static void pushBack(int v, int* arr, int& an)
+{
+	arr[an] = v;
+	an++;
+}
+
+static bool canRemoveVertex(rcContext* ctx, rcPolyMesh& mesh, const unsigned short rem)
+{
+	const int nvp = mesh.nvp;
+	
+	// Count number of polygons to remove.
+	int numRemovedVerts = 0;
+	int numTouchedVerts = 0;
+	int numRemainingEdges = 0;
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*nvp*2];
+		const int nv = countPolyVerts(p, nvp);
+		int numRemoved = 0;
+		int numVerts = 0;
+		for (int j = 0; j < nv; ++j)
+		{
+			if (p[j] == rem)
+			{
+				numTouchedVerts++;
+				numRemoved++;
+			}
+			numVerts++;
+		}
+		if (numRemoved)
+		{
+			numRemovedVerts += numRemoved;
+			numRemainingEdges += numVerts-(numRemoved+1);
+		}
+	}
+	
+	// There would be too few edges remaining to create a polygon.
+	// This can happen for example when a tip of a triangle is marked
+	// as deletion, but there are no other polys that share the vertex.
+	// In this case, the vertex should not be removed.
+	if (numRemainingEdges <= 2)
+		return false;
+	
+	// Find edges which share the removed vertex.
+	const int maxEdges = numTouchedVerts*2;
+	int nedges = 0;
+	rcScopedDelete<int> edges = (int*)rcAlloc(sizeof(int)*maxEdges*3, RC_ALLOC_TEMP);
+	if (!edges)
+	{
+		ctx->log(RC_LOG_WARNING, "canRemoveVertex: Out of memory 'edges' (%d).", maxEdges*3);
+		return false;
+	}
+		
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*nvp*2];
+		const int nv = countPolyVerts(p, nvp);
+
+		// Collect edges which touches the removed vertex.
+		for (int j = 0, k = nv-1; j < nv; k = j++)
+		{
+			if (p[j] == rem || p[k] == rem)
+			{
+				// Arrange edge so that a=rem.
+				int a = p[j], b = p[k];
+				if (b == rem)
+					rcSwap(a,b);
+					
+				// Check if the edge exists
+				bool exists = false;
+				for (int m = 0; m < nedges; ++m)
+				{
+					int* e = &edges[m*3];
+					if (e[1] == b)
+					{
+						// Exists, increment vertex share count.
+						e[2]++;
+						exists = true;
+					}
+				}
+				// Add new edge.
+				if (!exists)
+				{
+					int* e = &edges[nedges*3];
+					e[0] = a;
+					e[1] = b;
+					e[2] = 1;
+					nedges++;
+				}
+			}
+		}
+	}
+
+	// There should be no more than 2 open edges.
+	// This catches the case that two non-adjacent polygons
+	// share the removed vertex. In that case, do not remove the vertex.
+	int numOpenEdges = 0;
+	for (int i = 0; i < nedges; ++i)
+	{
+		if (edges[i*3+2] < 2)
+			numOpenEdges++;
+	}
+	if (numOpenEdges > 2)
+		return false;
+	
+	return true;
+}
+
+static bool removeVertex(rcContext* ctx, rcPolyMesh& mesh, const unsigned short rem, const int maxTris)
+{
+	const int nvp = mesh.nvp;
+
+	// Count number of polygons to remove.
+	int numRemovedVerts = 0;
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*nvp*2];
+		const int nv = countPolyVerts(p, nvp);
+		for (int j = 0; j < nv; ++j)
+		{
+			if (p[j] == rem)
+				numRemovedVerts++;
+		}
+	}
+	
+	int nedges = 0;
+	rcScopedDelete<int> edges = (int*)rcAlloc(sizeof(int)*numRemovedVerts*nvp*4, RC_ALLOC_TEMP);
+	if (!edges)
+	{
+		ctx->log(RC_LOG_WARNING, "removeVertex: Out of memory 'edges' (%d).", numRemovedVerts*nvp*4);
+		return false;
+	}
+
+	int nhole = 0;
+	rcScopedDelete<int> hole = (int*)rcAlloc(sizeof(int)*numRemovedVerts*nvp, RC_ALLOC_TEMP);
+	if (!hole)
+	{
+		ctx->log(RC_LOG_WARNING, "removeVertex: Out of memory 'hole' (%d).", numRemovedVerts*nvp);
+		return false;
+	}
+	
+	int nhreg = 0;
+	rcScopedDelete<int> hreg = (int*)rcAlloc(sizeof(int)*numRemovedVerts*nvp, RC_ALLOC_TEMP);
+	if (!hreg)
+	{
+		ctx->log(RC_LOG_WARNING, "removeVertex: Out of memory 'hreg' (%d).", numRemovedVerts*nvp);
+		return false;
+	}
+
+	int nharea = 0;
+	rcScopedDelete<int> harea = (int*)rcAlloc(sizeof(int)*numRemovedVerts*nvp, RC_ALLOC_TEMP);
+	if (!harea)
+	{
+		ctx->log(RC_LOG_WARNING, "removeVertex: Out of memory 'harea' (%d).", numRemovedVerts*nvp);
+		return false;
+	}
+	
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*nvp*2];
+		const int nv = countPolyVerts(p, nvp);
+		bool hasRem = false;
+		for (int j = 0; j < nv; ++j)
+			if (p[j] == rem) hasRem = true;
+		if (hasRem)
+		{
+			// Collect edges which does not touch the removed vertex.
+			for (int j = 0, k = nv-1; j < nv; k = j++)
+			{
+				if (p[j] != rem && p[k] != rem)
+				{
+					int* e = &edges[nedges*4];
+					e[0] = p[k];
+					e[1] = p[j];
+					e[2] = mesh.regs[i];
+					e[3] = mesh.areas[i];
+					nedges++;
+				}
+			}
+			// Remove the polygon.
+			unsigned short* p2 = &mesh.polys[(mesh.npolys-1)*nvp*2];
+			memcpy(p,p2,sizeof(unsigned short)*nvp);
+			memset(p+nvp,0xff,sizeof(unsigned short)*nvp);
+			mesh.regs[i] = mesh.regs[mesh.npolys-1];
+			mesh.areas[i] = mesh.areas[mesh.npolys-1];
+			mesh.npolys--;
+			--i;
+		}
+	}
+	
+	// Remove vertex.
+	for (int i = (int)rem; i < mesh.nverts; ++i)
+	{
+		mesh.verts[i*3+0] = mesh.verts[(i+1)*3+0];
+		mesh.verts[i*3+1] = mesh.verts[(i+1)*3+1];
+		mesh.verts[i*3+2] = mesh.verts[(i+1)*3+2];
+	}
+	mesh.nverts--;
+
+	// Adjust indices to match the removed vertex layout.
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		unsigned short* p = &mesh.polys[i*nvp*2];
+		const int nv = countPolyVerts(p, nvp);
+		for (int j = 0; j < nv; ++j)
+			if (p[j] > rem) p[j]--;
+	}
+	for (int i = 0; i < nedges; ++i)
+	{
+		if (edges[i*4+0] > rem) edges[i*4+0]--;
+		if (edges[i*4+1] > rem) edges[i*4+1]--;
+	}
+
+	if (nedges == 0)
+		return true;
+
+	// Start with one vertex, keep appending connected
+	// segments to the start and end of the hole.
+	pushBack(edges[0], hole, nhole);
+	pushBack(edges[2], hreg, nhreg);
+	pushBack(edges[3], harea, nharea);
+	
+	while (nedges)
+	{
+		bool match = false;
+		
+		for (int i = 0; i < nedges; ++i)
+		{
+			const int ea = edges[i*4+0];
+			const int eb = edges[i*4+1];
+			const int r = edges[i*4+2];
+			const int a = edges[i*4+3];
+			bool add = false;
+			if (hole[0] == eb)
+			{
+				// The segment matches the beginning of the hole boundary.
+				pushFront(ea, hole, nhole);
+				pushFront(r, hreg, nhreg);
+				pushFront(a, harea, nharea);
+				add = true;
+			}
+			else if (hole[nhole-1] == ea)
+			{
+				// The segment matches the end of the hole boundary.
+				pushBack(eb, hole, nhole);
+				pushBack(r, hreg, nhreg);
+				pushBack(a, harea, nharea);
+				add = true;
+			}
+			if (add)
+			{
+				// The edge segment was added, remove it.
+				edges[i*4+0] = edges[(nedges-1)*4+0];
+				edges[i*4+1] = edges[(nedges-1)*4+1];
+				edges[i*4+2] = edges[(nedges-1)*4+2];
+				edges[i*4+3] = edges[(nedges-1)*4+3];
+				--nedges;
+				match = true;
+				--i;
+			}
+		}
+		
+		if (!match)
+			break;
+	}
+
+	rcScopedDelete<int> tris = (int*)rcAlloc(sizeof(int)*nhole*3, RC_ALLOC_TEMP);
+	if (!tris)
+	{
+		ctx->log(RC_LOG_WARNING, "removeVertex: Out of memory 'tris' (%d).", nhole*3);
+		return false;
+	}
+
+	rcScopedDelete<int> tverts = (int*)rcAlloc(sizeof(int)*nhole*4, RC_ALLOC_TEMP);
+	if (!tverts)
+	{
+		ctx->log(RC_LOG_WARNING, "removeVertex: Out of memory 'tverts' (%d).", nhole*4);
+		return false;
+	}
+
+	rcScopedDelete<int> thole = (int*)rcAlloc(sizeof(int)*nhole, RC_ALLOC_TEMP);
+	if (!tverts)
+	{
+		ctx->log(RC_LOG_WARNING, "removeVertex: Out of memory 'thole' (%d).", nhole);
+		return false;
+	}
+
+	// Generate temp vertex array for triangulation.
+	for (int i = 0; i < nhole; ++i)
+	{
+		const int pi = hole[i];
+		tverts[i*4+0] = mesh.verts[pi*3+0];
+		tverts[i*4+1] = mesh.verts[pi*3+1];
+		tverts[i*4+2] = mesh.verts[pi*3+2];
+		tverts[i*4+3] = 0;
+		thole[i] = i;
+	}
+
+	// Triangulate the hole.
+	int ntris = triangulate(nhole, &tverts[0], &thole[0], tris);
+	if (ntris < 0)
+	{
+		ntris = -ntris;
+		ctx->log(RC_LOG_WARNING, "removeVertex: triangulate() returned bad results.");
+	}
+	
+	// Merge the hole triangles back to polygons.
+	rcScopedDelete<unsigned short> polys = (unsigned short*)rcAlloc(sizeof(unsigned short)*(ntris+1)*nvp, RC_ALLOC_TEMP);
+	if (!polys)
+	{
+		ctx->log(RC_LOG_ERROR, "removeVertex: Out of memory 'polys' (%d).", (ntris+1)*nvp);
+		return false;
+	}
+	rcScopedDelete<unsigned short> pregs = (unsigned short*)rcAlloc(sizeof(unsigned short)*ntris, RC_ALLOC_TEMP);
+	if (!pregs)
+	{
+		ctx->log(RC_LOG_ERROR, "removeVertex: Out of memory 'pregs' (%d).", ntris);
+		return false;
+	}
+	rcScopedDelete<unsigned char> pareas = (unsigned char*)rcAlloc(sizeof(unsigned char)*ntris, RC_ALLOC_TEMP);
+	if (!pregs)
+	{
+		ctx->log(RC_LOG_ERROR, "removeVertex: Out of memory 'pareas' (%d).", ntris);
+		return false;
+	}
+	
+	unsigned short* tmpPoly = &polys[ntris*nvp];
+			
+	// Build initial polygons.
+	int npolys = 0;
+	memset(polys, 0xff, ntris*nvp*sizeof(unsigned short));
+	for (int j = 0; j < ntris; ++j)
+	{
+		int* t = &tris[j*3];
+		if (t[0] != t[1] && t[0] != t[2] && t[1] != t[2])
+		{
+			polys[npolys*nvp+0] = (unsigned short)hole[t[0]];
+			polys[npolys*nvp+1] = (unsigned short)hole[t[1]];
+			polys[npolys*nvp+2] = (unsigned short)hole[t[2]];
+			pregs[npolys] = (unsigned short)hreg[t[0]];
+			pareas[npolys] = (unsigned char)harea[t[0]];
+			npolys++;
+		}
+	}
+	if (!npolys)
+		return true;
+	
+	// Merge polygons.
+	if (nvp > 3)
+	{
+		for (;;)
+		{
+			// Find best polygons to merge.
+			int bestMergeVal = 0;
+			int bestPa = 0, bestPb = 0, bestEa = 0, bestEb = 0;
+			
+			for (int j = 0; j < npolys-1; ++j)
+			{
+				unsigned short* pj = &polys[j*nvp];
+				for (int k = j+1; k < npolys; ++k)
+				{
+					unsigned short* pk = &polys[k*nvp];
+					int ea, eb;
+					int v = getPolyMergeValue(pj, pk, mesh.verts, ea, eb, nvp);
+					if (v > bestMergeVal)
+					{
+						bestMergeVal = v;
+						bestPa = j;
+						bestPb = k;
+						bestEa = ea;
+						bestEb = eb;
+					}
+				}
+			}
+			
+			if (bestMergeVal > 0)
+			{
+				// Found best, merge.
+				unsigned short* pa = &polys[bestPa*nvp];
+				unsigned short* pb = &polys[bestPb*nvp];
+				mergePolys(pa, pb, bestEa, bestEb, tmpPoly, nvp);
+				memcpy(pb, &polys[(npolys-1)*nvp], sizeof(unsigned short)*nvp);
+				pregs[bestPb] = pregs[npolys-1];
+				pareas[bestPb] = pareas[npolys-1];
+				npolys--;
+			}
+			else
+			{
+				// Could not merge any polygons, stop.
+				break;
+			}
+		}
+	}
+	
+	// Store polygons.
+	for (int i = 0; i < npolys; ++i)
+	{
+		if (mesh.npolys >= maxTris) break;
+		unsigned short* p = &mesh.polys[mesh.npolys*nvp*2];
+		memset(p,0xff,sizeof(unsigned short)*nvp*2);
+		for (int j = 0; j < nvp; ++j)
+			p[j] = polys[i*nvp+j];
+		mesh.regs[mesh.npolys] = pregs[i];
+		mesh.areas[mesh.npolys] = pareas[i];
+		mesh.npolys++;
+		if (mesh.npolys > maxTris)
+		{
+			ctx->log(RC_LOG_ERROR, "removeVertex: Too many polygons %d (max:%d).", mesh.npolys, maxTris);
+			return false;
+		}
+	}
+	
+	return true;
+}
+
+/// @par
+///
+/// @note If the mesh data is to be used to construct a Detour navigation mesh, then the upper 
+/// limit must be retricted to <= #DT_VERTS_PER_POLYGON.
+///
+/// @see rcAllocPolyMesh, rcContourSet, rcPolyMesh, rcConfig
+bool rcBuildPolyMesh(rcContext* ctx, rcContourSet& cset, const int nvp, rcPolyMesh& mesh)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_BUILD_POLYMESH);
+
+	rcVcopy(mesh.bmin, cset.bmin);
+	rcVcopy(mesh.bmax, cset.bmax);
+	mesh.cs = cset.cs;
+	mesh.ch = cset.ch;
+	mesh.borderSize = cset.borderSize;
+	
+	int maxVertices = 0;
+	int maxTris = 0;
+	int maxVertsPerCont = 0;
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		// Skip null contours.
+		if (cset.conts[i].nverts < 3) continue;
+		maxVertices += cset.conts[i].nverts;
+		maxTris += cset.conts[i].nverts - 2;
+		maxVertsPerCont = rcMax(maxVertsPerCont, cset.conts[i].nverts);
+	}
+	
+	if (maxVertices >= 0xfffe)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Too many vertices %d.", maxVertices);
+		return false;
+	}
+		
+	rcScopedDelete<unsigned char> vflags = (unsigned char*)rcAlloc(sizeof(unsigned char)*maxVertices, RC_ALLOC_TEMP);
+	if (!vflags)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'vflags' (%d).", maxVertices);
+		return false;
+	}
+	memset(vflags, 0, maxVertices);
+	
+	mesh.verts = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxVertices*3, RC_ALLOC_PERM);
+	if (!mesh.verts)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.verts' (%d).", maxVertices);
+		return false;
+	}
+	mesh.polys = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxTris*nvp*2, RC_ALLOC_PERM);
+	if (!mesh.polys)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.polys' (%d).", maxTris*nvp*2);
+		return false;
+	}
+	mesh.regs = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxTris, RC_ALLOC_PERM);
+	if (!mesh.regs)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.regs' (%d).", maxTris);
+		return false;
+	}
+	mesh.areas = (unsigned char*)rcAlloc(sizeof(unsigned char)*maxTris, RC_ALLOC_PERM);
+	if (!mesh.areas)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.areas' (%d).", maxTris);
+		return false;
+	}
+	
+	mesh.nverts = 0;
+	mesh.npolys = 0;
+	mesh.nvp = nvp;
+	mesh.maxpolys = maxTris;
+	
+	memset(mesh.verts, 0, sizeof(unsigned short)*maxVertices*3);
+	memset(mesh.polys, 0xff, sizeof(unsigned short)*maxTris*nvp*2);
+	memset(mesh.regs, 0, sizeof(unsigned short)*maxTris);
+	memset(mesh.areas, 0, sizeof(unsigned char)*maxTris);
+	
+	rcScopedDelete<int> nextVert = (int*)rcAlloc(sizeof(int)*maxVertices, RC_ALLOC_TEMP);
+	if (!nextVert)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'nextVert' (%d).", maxVertices);
+		return false;
+	}
+	memset(nextVert, 0, sizeof(int)*maxVertices);
+	
+	rcScopedDelete<int> firstVert = (int*)rcAlloc(sizeof(int)*VERTEX_BUCKET_COUNT, RC_ALLOC_TEMP);
+	if (!firstVert)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'firstVert' (%d).", VERTEX_BUCKET_COUNT);
+		return false;
+	}
+	for (int i = 0; i < VERTEX_BUCKET_COUNT; ++i)
+		firstVert[i] = -1;
+	
+	rcScopedDelete<int> indices = (int*)rcAlloc(sizeof(int)*maxVertsPerCont, RC_ALLOC_TEMP);
+	if (!indices)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'indices' (%d).", maxVertsPerCont);
+		return false;
+	}
+	rcScopedDelete<int> tris = (int*)rcAlloc(sizeof(int)*maxVertsPerCont*3, RC_ALLOC_TEMP);
+	if (!tris)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'tris' (%d).", maxVertsPerCont*3);
+		return false;
+	}
+	rcScopedDelete<unsigned short> polys = (unsigned short*)rcAlloc(sizeof(unsigned short)*(maxVertsPerCont+1)*nvp, RC_ALLOC_TEMP);
+	if (!polys)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'polys' (%d).", maxVertsPerCont*nvp);
+		return false;
+	}
+	unsigned short* tmpPoly = &polys[maxVertsPerCont*nvp];
+
+	for (int i = 0; i < cset.nconts; ++i)
+	{
+		rcContour& cont = cset.conts[i];
+		
+		// Skip null contours.
+		if (cont.nverts < 3)
+			continue;
+		
+		// Triangulate contour
+		for (int j = 0; j < cont.nverts; ++j)
+			indices[j] = j;
+			
+		int ntris = triangulate(cont.nverts, cont.verts, &indices[0], &tris[0]);
+		if (ntris <= 0)
+		{
+			// Bad triangulation, should not happen.
+/*			printf("\tconst float bmin[3] = {%ff,%ff,%ff};\n", cset.bmin[0], cset.bmin[1], cset.bmin[2]);
+			printf("\tconst float cs = %ff;\n", cset.cs);
+			printf("\tconst float ch = %ff;\n", cset.ch);
+			printf("\tconst int verts[] = {\n");
+			for (int k = 0; k < cont.nverts; ++k)
+			{
+				const int* v = &cont.verts[k*4];
+				printf("\t\t%d,%d,%d,%d,\n", v[0], v[1], v[2], v[3]);
+			}
+			printf("\t};\n\tconst int nverts = sizeof(verts)/(sizeof(int)*4);\n");*/
+			ctx->log(RC_LOG_WARNING, "rcBuildPolyMesh: Bad triangulation Contour %d.", i);
+			ntris = -ntris;
+		}
+				
+		// Add and merge vertices.
+		for (int j = 0; j < cont.nverts; ++j)
+		{
+			const int* v = &cont.verts[j*4];
+			indices[j] = addVertex((unsigned short)v[0], (unsigned short)v[1], (unsigned short)v[2],
+								   mesh.verts, firstVert, nextVert, mesh.nverts);
+			if (v[3] & RC_BORDER_VERTEX)
+			{
+				// This vertex should be removed.
+				vflags[indices[j]] = 1;
+			}
+		}
+
+		// Build initial polygons.
+		int npolys = 0;
+		memset(polys, 0xff, maxVertsPerCont*nvp*sizeof(unsigned short));
+		for (int j = 0; j < ntris; ++j)
+		{
+			int* t = &tris[j*3];
+			if (t[0] != t[1] && t[0] != t[2] && t[1] != t[2])
+			{
+				polys[npolys*nvp+0] = (unsigned short)indices[t[0]];
+				polys[npolys*nvp+1] = (unsigned short)indices[t[1]];
+				polys[npolys*nvp+2] = (unsigned short)indices[t[2]];
+				npolys++;
+			}
+		}
+		if (!npolys)
+			continue;
+		
+		// Merge polygons.
+		if (nvp > 3)
+		{
+			for(;;)
+			{
+				// Find best polygons to merge.
+				int bestMergeVal = 0;
+				int bestPa = 0, bestPb = 0, bestEa = 0, bestEb = 0;
+				
+				for (int j = 0; j < npolys-1; ++j)
+				{
+					unsigned short* pj = &polys[j*nvp];
+					for (int k = j+1; k < npolys; ++k)
+					{
+						unsigned short* pk = &polys[k*nvp];
+						int ea, eb;
+						int v = getPolyMergeValue(pj, pk, mesh.verts, ea, eb, nvp);
+						if (v > bestMergeVal)
+						{
+							bestMergeVal = v;
+							bestPa = j;
+							bestPb = k;
+							bestEa = ea;
+							bestEb = eb;
+						}
+					}
+				}
+				
+				if (bestMergeVal > 0)
+				{
+					// Found best, merge.
+					unsigned short* pa = &polys[bestPa*nvp];
+					unsigned short* pb = &polys[bestPb*nvp];
+					mergePolys(pa, pb, bestEa, bestEb, tmpPoly, nvp);
+					memcpy(pb, &polys[(npolys-1)*nvp], sizeof(unsigned short)*nvp);
+					npolys--;
+				}
+				else
+				{
+					// Could not merge any polygons, stop.
+					break;
+				}
+			}
+		}
+		
+		// Store polygons.
+		for (int j = 0; j < npolys; ++j)
+		{
+			unsigned short* p = &mesh.polys[mesh.npolys*nvp*2];
+			unsigned short* q = &polys[j*nvp];
+			for (int k = 0; k < nvp; ++k)
+				p[k] = q[k];
+			mesh.regs[mesh.npolys] = cont.reg;
+			mesh.areas[mesh.npolys] = cont.area;
+			mesh.npolys++;
+			if (mesh.npolys > maxTris)
+			{
+				ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Too many polygons %d (max:%d).", mesh.npolys, maxTris);
+				return false;
+			}
+		}
+	}
+	
+	
+	// Remove edge vertices.
+	for (int i = 0; i < mesh.nverts; ++i)
+	{
+		if (vflags[i])
+		{
+			if (!canRemoveVertex(ctx, mesh, (unsigned short)i))
+				continue;
+			if (!removeVertex(ctx, mesh, (unsigned short)i, maxTris))
+			{
+				// Failed to remove vertex
+				ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Failed to remove edge vertex %d.", i);
+				return false;
+			}
+			// Remove vertex
+			// Note: mesh.nverts is already decremented inside removeVertex()!
+			// Fixup vertex flags
+			for (int j = i; j < mesh.nverts; ++j)
+				vflags[j] = vflags[j+1];
+			--i;
+		}
+	}
+	
+	// Calculate adjacency.
+	if (!buildMeshAdjacency(mesh.polys, mesh.npolys, mesh.nverts, nvp))
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Adjacency failed.");
+		return false;
+	}
+	
+	// Find portal edges
+	if (mesh.borderSize > 0)
+	{
+		const int w = cset.width;
+		const int h = cset.height;
+		for (int i = 0; i < mesh.npolys; ++i)
+		{
+			unsigned short* p = &mesh.polys[i*2*nvp];
+			for (int j = 0; j < nvp; ++j)
+			{
+				if (p[j] == RC_MESH_NULL_IDX) break;
+				// Skip connected edges.
+				if (p[nvp+j] != RC_MESH_NULL_IDX)
+					continue;
+				int nj = j+1;
+				if (nj >= nvp || p[nj] == RC_MESH_NULL_IDX) nj = 0;
+				const unsigned short* va = &mesh.verts[p[j]*3];
+				const unsigned short* vb = &mesh.verts[p[nj]*3];
+
+				if ((int)va[0] == 0 && (int)vb[0] == 0)
+					p[nvp+j] = 0x8000 | 0;
+				else if ((int)va[2] == h && (int)vb[2] == h)
+					p[nvp+j] = 0x8000 | 1;
+				else if ((int)va[0] == w && (int)vb[0] == w)
+					p[nvp+j] = 0x8000 | 2;
+				else if ((int)va[2] == 0 && (int)vb[2] == 0)
+					p[nvp+j] = 0x8000 | 3;
+			}
+		}
+	}
+
+	// Just allocate the mesh flags array. The user is resposible to fill it.
+	mesh.flags = (unsigned short*)rcAlloc(sizeof(unsigned short)*mesh.npolys, RC_ALLOC_PERM);
+	if (!mesh.flags)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: Out of memory 'mesh.flags' (%d).", mesh.npolys);
+		return false;
+	}
+	memset(mesh.flags, 0, sizeof(unsigned short) * mesh.npolys);
+	
+	if (mesh.nverts > 0xffff)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: The resulting mesh has too many vertices %d (max %d). Data can be corrupted.", mesh.nverts, 0xffff);
+	}
+	if (mesh.npolys > 0xffff)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMesh: The resulting mesh has too many polygons %d (max %d). Data can be corrupted.", mesh.npolys, 0xffff);
+	}
+	
+	ctx->stopTimer(RC_TIMER_BUILD_POLYMESH);
+	
+	return true;
+}
+
+/// @see rcAllocPolyMesh, rcPolyMesh
+bool rcMergePolyMeshes(rcContext* ctx, rcPolyMesh** meshes, const int nmeshes, rcPolyMesh& mesh)
+{
+	rcAssert(ctx);
+	
+	if (!nmeshes || !meshes)
+		return true;
+
+	ctx->startTimer(RC_TIMER_MERGE_POLYMESH);
+
+	mesh.nvp = meshes[0]->nvp;
+	mesh.cs = meshes[0]->cs;
+	mesh.ch = meshes[0]->ch;
+	rcVcopy(mesh.bmin, meshes[0]->bmin);
+	rcVcopy(mesh.bmax, meshes[0]->bmax);
+
+	int maxVerts = 0;
+	int maxPolys = 0;
+	int maxVertsPerMesh = 0;
+	for (int i = 0; i < nmeshes; ++i)
+	{
+		rcVmin(mesh.bmin, meshes[i]->bmin);
+		rcVmax(mesh.bmax, meshes[i]->bmax);
+		maxVertsPerMesh = rcMax(maxVertsPerMesh, meshes[i]->nverts);
+		maxVerts += meshes[i]->nverts;
+		maxPolys += meshes[i]->npolys;
+	}
+	
+	mesh.nverts = 0;
+	mesh.verts = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxVerts*3, RC_ALLOC_PERM);
+	if (!mesh.verts)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.verts' (%d).", maxVerts*3);
+		return false;
+	}
+
+	mesh.npolys = 0;
+	mesh.polys = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxPolys*2*mesh.nvp, RC_ALLOC_PERM);
+	if (!mesh.polys)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.polys' (%d).", maxPolys*2*mesh.nvp);
+		return false;
+	}
+	memset(mesh.polys, 0xff, sizeof(unsigned short)*maxPolys*2*mesh.nvp);
+
+	mesh.regs = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxPolys, RC_ALLOC_PERM);
+	if (!mesh.regs)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.regs' (%d).", maxPolys);
+		return false;
+	}
+	memset(mesh.regs, 0, sizeof(unsigned short)*maxPolys);
+
+	mesh.areas = (unsigned char*)rcAlloc(sizeof(unsigned char)*maxPolys, RC_ALLOC_PERM);
+	if (!mesh.areas)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.areas' (%d).", maxPolys);
+		return false;
+	}
+	memset(mesh.areas, 0, sizeof(unsigned char)*maxPolys);
+
+	mesh.flags = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxPolys, RC_ALLOC_PERM);
+	if (!mesh.flags)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'mesh.flags' (%d).", maxPolys);
+		return false;
+	}
+	memset(mesh.flags, 0, sizeof(unsigned short)*maxPolys);
+	
+	rcScopedDelete<int> nextVert = (int*)rcAlloc(sizeof(int)*maxVerts, RC_ALLOC_TEMP);
+	if (!nextVert)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'nextVert' (%d).", maxVerts);
+		return false;
+	}
+	memset(nextVert, 0, sizeof(int)*maxVerts);
+	
+	rcScopedDelete<int> firstVert = (int*)rcAlloc(sizeof(int)*VERTEX_BUCKET_COUNT, RC_ALLOC_TEMP);
+	if (!firstVert)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'firstVert' (%d).", VERTEX_BUCKET_COUNT);
+		return false;
+	}
+	for (int i = 0; i < VERTEX_BUCKET_COUNT; ++i)
+		firstVert[i] = -1;
+
+	rcScopedDelete<unsigned short> vremap = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxVertsPerMesh, RC_ALLOC_PERM);
+	if (!vremap)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Out of memory 'vremap' (%d).", maxVertsPerMesh);
+		return false;
+	}
+	memset(vremap, 0, sizeof(unsigned short)*maxVertsPerMesh);
+	
+	for (int i = 0; i < nmeshes; ++i)
+	{
+		const rcPolyMesh* pmesh = meshes[i];
+		
+		const unsigned short ox = (unsigned short)floorf((pmesh->bmin[0]-mesh.bmin[0])/mesh.cs+0.5f);
+		const unsigned short oz = (unsigned short)floorf((pmesh->bmin[2]-mesh.bmin[2])/mesh.cs+0.5f);
+		
+		for (int j = 0; j < pmesh->nverts; ++j)
+		{
+			unsigned short* v = &pmesh->verts[j*3];
+			vremap[j] = addVertex(v[0]+ox, v[1], v[2]+oz,
+								  mesh.verts, firstVert, nextVert, mesh.nverts);
+		}
+		
+		for (int j = 0; j < pmesh->npolys; ++j)
+		{
+			unsigned short* tgt = &mesh.polys[mesh.npolys*2*mesh.nvp];
+			unsigned short* src = &pmesh->polys[j*2*mesh.nvp];
+			mesh.regs[mesh.npolys] = pmesh->regs[j];
+			mesh.areas[mesh.npolys] = pmesh->areas[j];
+			mesh.flags[mesh.npolys] = pmesh->flags[j];
+			mesh.npolys++;
+			for (int k = 0; k < mesh.nvp; ++k)
+			{
+				if (src[k] == RC_MESH_NULL_IDX) break;
+				tgt[k] = vremap[src[k]];
+			}
+		}
+	}
+
+	// Calculate adjacency.
+	if (!buildMeshAdjacency(mesh.polys, mesh.npolys, mesh.nverts, mesh.nvp))
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: Adjacency failed.");
+		return false;
+	}
+
+	if (mesh.nverts > 0xffff)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: The resulting mesh has too many vertices %d (max %d). Data can be corrupted.", mesh.nverts, 0xffff);
+	}
+	if (mesh.npolys > 0xffff)
+	{
+		ctx->log(RC_LOG_ERROR, "rcMergePolyMeshes: The resulting mesh has too many polygons %d (max %d). Data can be corrupted.", mesh.npolys, 0xffff);
+	}
+	
+	ctx->stopTimer(RC_TIMER_MERGE_POLYMESH);
+	
+	return true;
+}
+
+bool rcCopyPolyMesh(rcContext* ctx, const rcPolyMesh& src, rcPolyMesh& dst)
+{
+	rcAssert(ctx);
+	
+	// Destination must be empty.
+	rcAssert(dst.verts == 0);
+	rcAssert(dst.polys == 0);
+	rcAssert(dst.regs == 0);
+	rcAssert(dst.areas == 0);
+	rcAssert(dst.flags == 0);
+	
+	dst.nverts = src.nverts;
+	dst.npolys = src.npolys;
+	dst.maxpolys = src.npolys;
+	dst.nvp = src.nvp;
+	rcVcopy(dst.bmin, src.bmin);
+	rcVcopy(dst.bmax, src.bmax);
+	dst.cs = src.cs;
+	dst.ch = src.ch;
+	dst.borderSize = src.borderSize;
+	
+	dst.verts = (unsigned short*)rcAlloc(sizeof(unsigned short)*src.nverts*3, RC_ALLOC_PERM);
+	if (!dst.verts)
+	{
+		ctx->log(RC_LOG_ERROR, "rcCopyPolyMesh: Out of memory 'dst.verts' (%d).", src.nverts*3);
+		return false;
+	}
+	memcpy(dst.verts, src.verts, sizeof(unsigned short)*src.nverts*3);
+	
+	dst.polys = (unsigned short*)rcAlloc(sizeof(unsigned short)*src.npolys*2*src.nvp, RC_ALLOC_PERM);
+	if (!dst.polys)
+	{
+		ctx->log(RC_LOG_ERROR, "rcCopyPolyMesh: Out of memory 'dst.polys' (%d).", src.npolys*2*src.nvp);
+		return false;
+	}
+	memcpy(dst.polys, src.polys, sizeof(unsigned short)*src.npolys*2*src.nvp);
+	
+	dst.regs = (unsigned short*)rcAlloc(sizeof(unsigned short)*src.npolys, RC_ALLOC_PERM);
+	if (!dst.regs)
+	{
+		ctx->log(RC_LOG_ERROR, "rcCopyPolyMesh: Out of memory 'dst.regs' (%d).", src.npolys);
+		return false;
+	}
+	memcpy(dst.regs, src.regs, sizeof(unsigned short)*src.npolys);
+	
+	dst.areas = (unsigned char*)rcAlloc(sizeof(unsigned char)*src.npolys, RC_ALLOC_PERM);
+	if (!dst.areas)
+	{
+		ctx->log(RC_LOG_ERROR, "rcCopyPolyMesh: Out of memory 'dst.areas' (%d).", src.npolys);
+		return false;
+	}
+	memcpy(dst.areas, src.areas, sizeof(unsigned char)*src.npolys);
+	
+	dst.flags = (unsigned short*)rcAlloc(sizeof(unsigned short)*src.npolys, RC_ALLOC_PERM);
+	if (!dst.flags)
+	{
+		ctx->log(RC_LOG_ERROR, "rcCopyPolyMesh: Out of memory 'dst.flags' (%d).", src.npolys);
+		return false;
+	}
+	memcpy(dst.flags, src.flags, sizeof(unsigned char)*src.npolys);
+	
+	return true;
+}
diff --git a/Engine/lib/recast/Recast/Source/RecastMeshDetail.cpp b/Engine/lib/recast/Recast/Source/RecastMeshDetail.cpp
new file mode 100644
index 000000000..b38727546
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastMeshDetail.cpp
@@ -0,0 +1,1245 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <float.h>
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+
+
+static const unsigned RC_UNSET_HEIGHT = 0xffff;
+
+struct rcHeightPatch
+{
+	inline rcHeightPatch() : data(0), xmin(0), ymin(0), width(0), height(0) {}
+	inline ~rcHeightPatch() { rcFree(data); }
+	unsigned short* data;
+	int xmin, ymin, width, height;
+};
+
+
+inline float vdot2(const float* a, const float* b)
+{
+	return a[0]*b[0] + a[2]*b[2];
+}
+
+inline float vdistSq2(const float* p, const float* q)
+{
+	const float dx = q[0] - p[0];
+	const float dy = q[2] - p[2];
+	return dx*dx + dy*dy;
+}
+
+inline float vdist2(const float* p, const float* q)
+{
+	return sqrtf(vdistSq2(p,q));
+}
+
+inline float vcross2(const float* p1, const float* p2, const float* p3)
+{ 
+	const float u1 = p2[0] - p1[0];
+	const float v1 = p2[2] - p1[2];
+	const float u2 = p3[0] - p1[0];
+	const float v2 = p3[2] - p1[2];
+	return u1 * v2 - v1 * u2;
+}
+
+static bool circumCircle(const float* p1, const float* p2, const float* p3,
+						 float* c, float& r)
+{
+	static const float EPS = 1e-6f;
+	
+	const float cp = vcross2(p1, p2, p3);
+	if (fabsf(cp) > EPS)
+	{
+		const float p1Sq = vdot2(p1,p1);
+		const float p2Sq = vdot2(p2,p2);
+		const float p3Sq = vdot2(p3,p3);
+		c[0] = (p1Sq*(p2[2]-p3[2]) + p2Sq*(p3[2]-p1[2]) + p3Sq*(p1[2]-p2[2])) / (2*cp);
+		c[2] = (p1Sq*(p3[0]-p2[0]) + p2Sq*(p1[0]-p3[0]) + p3Sq*(p2[0]-p1[0])) / (2*cp);
+		r = vdist2(c, p1);
+		return true;
+	}
+
+	c[0] = p1[0];
+	c[2] = p1[2];
+	r = 0;
+	return false;
+}
+
+static float distPtTri(const float* p, const float* a, const float* b, const float* c)
+{
+	float v0[3], v1[3], v2[3];
+	rcVsub(v0, c,a);
+	rcVsub(v1, b,a);
+	rcVsub(v2, p,a);
+
+	const float dot00 = vdot2(v0, v0);
+	const float dot01 = vdot2(v0, v1);
+	const float dot02 = vdot2(v0, v2);
+	const float dot11 = vdot2(v1, v1);
+	const float dot12 = vdot2(v1, v2);
+	
+	// Compute barycentric coordinates
+	const float invDenom = 1.0f / (dot00 * dot11 - dot01 * dot01);
+	const float u = (dot11 * dot02 - dot01 * dot12) * invDenom;
+	float v = (dot00 * dot12 - dot01 * dot02) * invDenom;
+	
+	// If point lies inside the triangle, return interpolated y-coord.
+	static const float EPS = 1e-4f;
+	if (u >= -EPS && v >= -EPS && (u+v) <= 1+EPS)
+	{
+		const float y = a[1] + v0[1]*u + v1[1]*v;
+		return fabsf(y-p[1]);
+	}
+	return FLT_MAX;
+}
+
+static float distancePtSeg(const float* pt, const float* p, const float* q)
+{
+	float pqx = q[0] - p[0];
+	float pqy = q[1] - p[1];
+	float pqz = q[2] - p[2];
+	float dx = pt[0] - p[0];
+	float dy = pt[1] - p[1];
+	float dz = pt[2] - p[2];
+	float d = pqx*pqx + pqy*pqy + pqz*pqz;
+	float t = pqx*dx + pqy*dy + pqz*dz;
+	if (d > 0)
+		t /= d;
+	if (t < 0)
+		t = 0;
+	else if (t > 1)
+		t = 1;
+	
+	dx = p[0] + t*pqx - pt[0];
+	dy = p[1] + t*pqy - pt[1];
+	dz = p[2] + t*pqz - pt[2];
+	
+	return dx*dx + dy*dy + dz*dz;
+}
+
+static float distancePtSeg2d(const float* pt, const float* p, const float* q)
+{
+	float pqx = q[0] - p[0];
+	float pqz = q[2] - p[2];
+	float dx = pt[0] - p[0];
+	float dz = pt[2] - p[2];
+	float d = pqx*pqx + pqz*pqz;
+	float t = pqx*dx + pqz*dz;
+	if (d > 0)
+		t /= d;
+	if (t < 0)
+		t = 0;
+	else if (t > 1)
+		t = 1;
+	
+	dx = p[0] + t*pqx - pt[0];
+	dz = p[2] + t*pqz - pt[2];
+	
+	return dx*dx + dz*dz;
+}
+
+static float distToTriMesh(const float* p, const float* verts, const int /*nverts*/, const int* tris, const int ntris)
+{
+	float dmin = FLT_MAX;
+	for (int i = 0; i < ntris; ++i)
+	{
+		const float* va = &verts[tris[i*4+0]*3];
+		const float* vb = &verts[tris[i*4+1]*3];
+		const float* vc = &verts[tris[i*4+2]*3];
+		float d = distPtTri(p, va,vb,vc);
+		if (d < dmin)
+			dmin = d;
+	}
+	if (dmin == FLT_MAX) return -1;
+	return dmin;
+}
+
+static float distToPoly(int nvert, const float* verts, const float* p)
+{
+
+	float dmin = FLT_MAX;
+	int i, j, c = 0;
+	for (i = 0, j = nvert-1; i < nvert; j = i++)
+	{
+		const float* vi = &verts[i*3];
+		const float* vj = &verts[j*3];
+		if (((vi[2] > p[2]) != (vj[2] > p[2])) &&
+			(p[0] < (vj[0]-vi[0]) * (p[2]-vi[2]) / (vj[2]-vi[2]) + vi[0]) )
+			c = !c;
+		dmin = rcMin(dmin, distancePtSeg2d(p, vj, vi));
+	}
+	return c ? -dmin : dmin;
+}
+
+
+static unsigned short getHeight(const float fx, const float fy, const float fz,
+								const float /*cs*/, const float ics, const float ch,
+								const rcHeightPatch& hp)
+{
+	int ix = (int)floorf(fx*ics + 0.01f);
+	int iz = (int)floorf(fz*ics + 0.01f);
+	ix = rcClamp(ix-hp.xmin, 0, hp.width);
+	iz = rcClamp(iz-hp.ymin, 0, hp.height);
+	unsigned short h = hp.data[ix+iz*hp.width];
+	if (h == RC_UNSET_HEIGHT)
+	{
+		// Special case when data might be bad.
+		// Find nearest neighbour pixel which has valid height.
+		const int off[8*2] = { -1,0, -1,-1, 0,-1, 1,-1, 1,0, 1,1, 0,1, -1,1};
+		float dmin = FLT_MAX;
+		for (int i = 0; i < 8; ++i)
+		{
+			const int nx = ix+off[i*2+0];
+			const int nz = iz+off[i*2+1];
+			if (nx < 0 || nz < 0 || nx >= hp.width || nz >= hp.height) continue;
+			const unsigned short nh = hp.data[nx+nz*hp.width];
+			if (nh == RC_UNSET_HEIGHT) continue;
+
+			const float d = fabsf(nh*ch - fy);
+			if (d < dmin)
+			{
+				h = nh;
+				dmin = d;
+			}
+			
+/*			const float dx = (nx+0.5f)*cs - fx; 
+			const float dz = (nz+0.5f)*cs - fz;
+			const float d = dx*dx+dz*dz;
+			if (d < dmin)
+			{
+				h = nh;
+				dmin = d;
+			} */
+		}
+	}
+	return h;
+}
+
+
+enum EdgeValues
+{
+	UNDEF = -1,
+	HULL = -2,
+};
+
+static int findEdge(const int* edges, int nedges, int s, int t)
+{
+	for (int i = 0; i < nedges; i++)
+	{
+		const int* e = &edges[i*4];
+		if ((e[0] == s && e[1] == t) || (e[0] == t && e[1] == s))
+			return i;
+	}
+	return UNDEF;
+}
+
+static int addEdge(rcContext* ctx, int* edges, int& nedges, const int maxEdges, int s, int t, int l, int r)
+{
+	if (nedges >= maxEdges)
+	{
+		ctx->log(RC_LOG_ERROR, "addEdge: Too many edges (%d/%d).", nedges, maxEdges);
+		return UNDEF;
+	}
+	
+	// Add edge if not already in the triangulation. 
+	int e = findEdge(edges, nedges, s, t);
+	if (e == UNDEF)
+	{
+		int* edge = &edges[nedges*4];
+		edge[0] = s;
+		edge[1] = t;
+		edge[2] = l;
+		edge[3] = r;
+		return nedges++;
+	}
+	else
+	{
+		return UNDEF;
+	}
+}
+
+static void updateLeftFace(int* e, int s, int t, int f)
+{
+	if (e[0] == s && e[1] == t && e[2] == UNDEF)
+		e[2] = f;
+	else if (e[1] == s && e[0] == t && e[3] == UNDEF)
+		e[3] = f;
+}	
+
+static int overlapSegSeg2d(const float* a, const float* b, const float* c, const float* d)
+{
+	const float a1 = vcross2(a, b, d);
+	const float a2 = vcross2(a, b, c);
+	if (a1*a2 < 0.0f)
+	{
+		float a3 = vcross2(c, d, a);
+		float a4 = a3 + a2 - a1;
+		if (a3 * a4 < 0.0f)
+			return 1;
+	}	
+	return 0;
+}
+
+static bool overlapEdges(const float* pts, const int* edges, int nedges, int s1, int t1)
+{
+	for (int i = 0; i < nedges; ++i)
+	{
+		const int s0 = edges[i*4+0];
+		const int t0 = edges[i*4+1];
+		// Same or connected edges do not overlap.
+		if (s0 == s1 || s0 == t1 || t0 == s1 || t0 == t1)
+			continue;
+		if (overlapSegSeg2d(&pts[s0*3],&pts[t0*3], &pts[s1*3],&pts[t1*3]))
+			return true;
+	}
+	return false;
+}
+
+static void completeFacet(rcContext* ctx, const float* pts, int npts, int* edges, int& nedges, const int maxEdges, int& nfaces, int e)
+{
+	static const float EPS = 1e-5f;
+
+	int* edge = &edges[e*4];
+	
+	// Cache s and t.
+	int s,t;
+	if (edge[2] == UNDEF)
+	{
+		s = edge[0];
+		t = edge[1];
+	}
+	else if (edge[3] == UNDEF)
+	{
+		s = edge[1];
+		t = edge[0];
+	}
+	else
+	{
+	    // Edge already completed. 
+	    return;
+	}
+    
+	// Find best point on left of edge. 
+	int pt = npts;
+	float c[3] = {0,0,0};
+	float r = -1;
+	for (int u = 0; u < npts; ++u)
+	{
+		if (u == s || u == t) continue;
+		if (vcross2(&pts[s*3], &pts[t*3], &pts[u*3]) > EPS)
+		{
+			if (r < 0)
+			{
+				// The circle is not updated yet, do it now.
+				pt = u;
+				circumCircle(&pts[s*3], &pts[t*3], &pts[u*3], c, r);
+				continue;
+			}
+			const float d = vdist2(c, &pts[u*3]);
+			const float tol = 0.001f;
+			if (d > r*(1+tol))
+			{
+				// Outside current circumcircle, skip.
+				continue;
+			}
+			else if (d < r*(1-tol))
+			{
+				// Inside safe circumcircle, update circle.
+				pt = u;
+				circumCircle(&pts[s*3], &pts[t*3], &pts[u*3], c, r);
+			}
+			else
+			{
+				// Inside epsilon circum circle, do extra tests to make sure the edge is valid.
+				// s-u and t-u cannot overlap with s-pt nor t-pt if they exists.
+				if (overlapEdges(pts, edges, nedges, s,u))
+					continue;
+				if (overlapEdges(pts, edges, nedges, t,u))
+					continue;
+				// Edge is valid.
+				pt = u;
+				circumCircle(&pts[s*3], &pts[t*3], &pts[u*3], c, r);
+			}
+		}
+	}
+	
+	// Add new triangle or update edge info if s-t is on hull. 
+	if (pt < npts)
+	{
+		// Update face information of edge being completed. 
+		updateLeftFace(&edges[e*4], s, t, nfaces);
+		
+		// Add new edge or update face info of old edge. 
+		e = findEdge(edges, nedges, pt, s);
+		if (e == UNDEF)
+		    addEdge(ctx, edges, nedges, maxEdges, pt, s, nfaces, UNDEF);
+		else
+		    updateLeftFace(&edges[e*4], pt, s, nfaces);
+		
+		// Add new edge or update face info of old edge. 
+		e = findEdge(edges, nedges, t, pt);
+		if (e == UNDEF)
+		    addEdge(ctx, edges, nedges, maxEdges, t, pt, nfaces, UNDEF);
+		else
+		    updateLeftFace(&edges[e*4], t, pt, nfaces);
+		
+		nfaces++;
+	}
+	else
+	{
+		updateLeftFace(&edges[e*4], s, t, HULL);
+	}
+}
+
+static void delaunayHull(rcContext* ctx, const int npts, const float* pts,
+						 const int nhull, const int* hull,
+						 rcIntArray& tris, rcIntArray& edges)
+{
+	int nfaces = 0;
+	int nedges = 0;
+	const int maxEdges = npts*10;
+	edges.resize(maxEdges*4);
+	
+	for (int i = 0, j = nhull-1; i < nhull; j=i++)
+		addEdge(ctx, &edges[0], nedges, maxEdges, hull[j],hull[i], HULL, UNDEF);
+	
+	int currentEdge = 0;
+	while (currentEdge < nedges)
+	{
+		if (edges[currentEdge*4+2] == UNDEF)
+			completeFacet(ctx, pts, npts, &edges[0], nedges, maxEdges, nfaces, currentEdge);
+		if (edges[currentEdge*4+3] == UNDEF)
+			completeFacet(ctx, pts, npts, &edges[0], nedges, maxEdges, nfaces, currentEdge);
+		currentEdge++;
+	}
+
+	// Create tris
+	tris.resize(nfaces*4);
+	for (int i = 0; i < nfaces*4; ++i)
+		tris[i] = -1;
+	
+	for (int i = 0; i < nedges; ++i)
+	{
+		const int* e = &edges[i*4];
+		if (e[3] >= 0)
+		{
+			// Left face
+			int* t = &tris[e[3]*4];
+			if (t[0] == -1)
+			{
+				t[0] = e[0];
+				t[1] = e[1];
+			}
+			else if (t[0] == e[1])
+				t[2] = e[0];
+			else if (t[1] == e[0])
+				t[2] = e[1];
+		}
+		if (e[2] >= 0)
+		{
+			// Right
+			int* t = &tris[e[2]*4];
+			if (t[0] == -1)
+			{
+				t[0] = e[1];
+				t[1] = e[0];
+			}
+			else if (t[0] == e[0])
+				t[2] = e[1];
+			else if (t[1] == e[1])
+				t[2] = e[0];
+		}
+	}
+	
+	for (int i = 0; i < tris.size()/4; ++i)
+	{
+		int* t = &tris[i*4];
+		if (t[0] == -1 || t[1] == -1 || t[2] == -1)
+		{
+			ctx->log(RC_LOG_WARNING, "delaunayHull: Removing dangling face %d [%d,%d,%d].", i, t[0],t[1],t[2]);
+			t[0] = tris[tris.size()-4];
+			t[1] = tris[tris.size()-3];
+			t[2] = tris[tris.size()-2];
+			t[3] = tris[tris.size()-1];
+			tris.resize(tris.size()-4);
+			--i;
+		}
+	}
+}
+
+
+inline float getJitterX(const int i)
+{
+	return (((i * 0x8da6b343) & 0xffff) / 65535.0f * 2.0f) - 1.0f;
+}
+
+inline float getJitterY(const int i)
+{
+	return (((i * 0xd8163841) & 0xffff) / 65535.0f * 2.0f) - 1.0f;
+}
+
+static bool buildPolyDetail(rcContext* ctx, const float* in, const int nin,
+							const float sampleDist, const float sampleMaxError,
+							const rcCompactHeightfield& chf, const rcHeightPatch& hp,
+							float* verts, int& nverts, rcIntArray& tris,
+							rcIntArray& edges, rcIntArray& samples)
+{
+	static const int MAX_VERTS = 127;
+	static const int MAX_TRIS = 255;	// Max tris for delaunay is 2n-2-k (n=num verts, k=num hull verts).
+	static const int MAX_VERTS_PER_EDGE = 32;
+	float edge[(MAX_VERTS_PER_EDGE+1)*3];
+	int hull[MAX_VERTS];
+	int nhull = 0;
+
+	nverts = 0;
+
+	for (int i = 0; i < nin; ++i)
+		rcVcopy(&verts[i*3], &in[i*3]);
+	nverts = nin;
+	
+	const float cs = chf.cs;
+	const float ics = 1.0f/cs;
+	
+	// Tessellate outlines.
+	// This is done in separate pass in order to ensure
+	// seamless height values across the ply boundaries.
+	if (sampleDist > 0)
+	{
+		for (int i = 0, j = nin-1; i < nin; j=i++)
+		{
+			const float* vj = &in[j*3];
+			const float* vi = &in[i*3];
+			bool swapped = false;
+			// Make sure the segments are always handled in same order
+			// using lexological sort or else there will be seams.
+			if (fabsf(vj[0]-vi[0]) < 1e-6f)
+			{
+				if (vj[2] > vi[2])
+				{
+					rcSwap(vj,vi);
+					swapped = true;
+				}
+			}
+			else
+			{
+				if (vj[0] > vi[0])
+				{
+					rcSwap(vj,vi);
+					swapped = true;
+				}
+			}
+			// Create samples along the edge.
+			float dx = vi[0] - vj[0];
+			float dy = vi[1] - vj[1];
+			float dz = vi[2] - vj[2];
+			float d = sqrtf(dx*dx + dz*dz);
+			int nn = 1 + (int)floorf(d/sampleDist);
+			if (nn >= MAX_VERTS_PER_EDGE) nn = MAX_VERTS_PER_EDGE-1;
+			if (nverts+nn >= MAX_VERTS)
+				nn = MAX_VERTS-1-nverts;
+			
+			for (int k = 0; k <= nn; ++k)
+			{
+				float u = (float)k/(float)nn;
+				float* pos = &edge[k*3];
+				pos[0] = vj[0] + dx*u;
+				pos[1] = vj[1] + dy*u;
+				pos[2] = vj[2] + dz*u;
+				pos[1] = getHeight(pos[0],pos[1],pos[2], cs, ics, chf.ch, hp)*chf.ch;
+			}
+			// Simplify samples.
+			int idx[MAX_VERTS_PER_EDGE] = {0,nn};
+			int nidx = 2;
+			for (int k = 0; k < nidx-1; )
+			{
+				const int a = idx[k];
+				const int b = idx[k+1];
+				const float* va = &edge[a*3];
+				const float* vb = &edge[b*3];
+				// Find maximum deviation along the segment.
+				float maxd = 0;
+				int maxi = -1;
+				for (int m = a+1; m < b; ++m)
+				{
+					float dev = distancePtSeg(&edge[m*3],va,vb);
+					if (dev > maxd)
+					{
+						maxd = dev;
+						maxi = m;
+					}
+				}
+				// If the max deviation is larger than accepted error,
+				// add new point, else continue to next segment.
+				if (maxi != -1 && maxd > rcSqr(sampleMaxError))
+				{
+					for (int m = nidx; m > k; --m)
+						idx[m] = idx[m-1];
+					idx[k+1] = maxi;
+					nidx++;
+				}
+				else
+				{
+					++k;
+				}
+			}
+			
+			hull[nhull++] = j;
+			// Add new vertices.
+			if (swapped)
+			{
+				for (int k = nidx-2; k > 0; --k)
+				{
+					rcVcopy(&verts[nverts*3], &edge[idx[k]*3]);
+					hull[nhull++] = nverts;
+					nverts++;
+				}
+			}
+			else
+			{
+				for (int k = 1; k < nidx-1; ++k)
+				{
+					rcVcopy(&verts[nverts*3], &edge[idx[k]*3]);
+					hull[nhull++] = nverts;
+					nverts++;
+				}
+			}
+		}
+	}
+	
+
+	// Tessellate the base mesh.
+	edges.resize(0);
+	tris.resize(0);
+
+	delaunayHull(ctx, nverts, verts, nhull, hull, tris, edges);
+	
+	if (tris.size() == 0)
+	{
+		// Could not triangulate the poly, make sure there is some valid data there.
+		ctx->log(RC_LOG_WARNING, "buildPolyDetail: Could not triangulate polygon, adding default data.");
+		for (int i = 2; i < nverts; ++i)
+		{
+			tris.push(0);
+			tris.push(i-1);
+			tris.push(i);
+			tris.push(0);
+		}
+		return true;
+	}
+
+	if (sampleDist > 0)
+	{
+		// Create sample locations in a grid.
+		float bmin[3], bmax[3];
+		rcVcopy(bmin, in);
+		rcVcopy(bmax, in);
+		for (int i = 1; i < nin; ++i)
+		{
+			rcVmin(bmin, &in[i*3]);
+			rcVmax(bmax, &in[i*3]);
+		}
+		int x0 = (int)floorf(bmin[0]/sampleDist);
+		int x1 = (int)ceilf(bmax[0]/sampleDist);
+		int z0 = (int)floorf(bmin[2]/sampleDist);
+		int z1 = (int)ceilf(bmax[2]/sampleDist);
+		samples.resize(0);
+		for (int z = z0; z < z1; ++z)
+		{
+			for (int x = x0; x < x1; ++x)
+			{
+				float pt[3];
+				pt[0] = x*sampleDist;
+				pt[1] = (bmax[1]+bmin[1])*0.5f;
+				pt[2] = z*sampleDist;
+				// Make sure the samples are not too close to the edges.
+				if (distToPoly(nin,in,pt) > -sampleDist/2) continue;
+				samples.push(x);
+				samples.push(getHeight(pt[0], pt[1], pt[2], cs, ics, chf.ch, hp));
+				samples.push(z);
+				samples.push(0); // Not added
+			}
+		}
+				
+		// Add the samples starting from the one that has the most
+		// error. The procedure stops when all samples are added
+		// or when the max error is within treshold.
+		const int nsamples = samples.size()/4;
+		for (int iter = 0; iter < nsamples; ++iter)
+		{
+			if (nverts >= MAX_VERTS)
+				break;
+
+			// Find sample with most error.
+			float bestpt[3] = {0,0,0};
+			float bestd = 0;
+			int besti = -1;
+			for (int i = 0; i < nsamples; ++i)
+			{
+				const int* s = &samples[i*4];
+				if (s[3]) continue; // skip added.
+				float pt[3];
+				// The sample location is jittered to get rid of some bad triangulations
+				// which are cause by symmetrical data from the grid structure.
+				pt[0] = s[0]*sampleDist + getJitterX(i)*cs*0.1f;
+				pt[1] = s[1]*chf.ch;
+				pt[2] = s[2]*sampleDist + getJitterY(i)*cs*0.1f;
+				float d = distToTriMesh(pt, verts, nverts, &tris[0], tris.size()/4);
+				if (d < 0) continue; // did not hit the mesh.
+				if (d > bestd)
+				{
+					bestd = d;
+					besti = i;
+					rcVcopy(bestpt,pt);
+				}
+			}
+			// If the max error is within accepted threshold, stop tesselating.
+			if (bestd <= sampleMaxError || besti == -1)
+				break;
+			// Mark sample as added.
+			samples[besti*4+3] = 1;
+			// Add the new sample point.
+			rcVcopy(&verts[nverts*3],bestpt);
+			nverts++;
+			
+			// Create new triangulation.
+			// TODO: Incremental add instead of full rebuild.
+			edges.resize(0);
+			tris.resize(0);
+			delaunayHull(ctx, nverts, verts, nhull, hull, tris, edges);
+		}		
+	}
+
+	const int ntris = tris.size()/4;
+	if (ntris > MAX_TRIS)
+	{
+		tris.resize(MAX_TRIS*4);
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Shrinking triangle count from %d to max %d.", ntris, MAX_TRIS);
+	}
+
+	return true;
+}
+
+static void getHeightData(const rcCompactHeightfield& chf,
+						  const unsigned short* poly, const int npoly,
+						  const unsigned short* verts, const int bs,
+						  rcHeightPatch& hp, rcIntArray& stack)
+{
+	// Floodfill the heightfield to get 2D height data,
+	// starting at vertex locations as seeds.
+	
+	// Note: Reads to the compact heightfield are offset by border size (bs)
+	// since border size offset is already removed from the polymesh vertices.
+	
+	memset(hp.data, 0, sizeof(unsigned short)*hp.width*hp.height);
+	
+	stack.resize(0);
+	
+	static const int offset[9*2] =
+	{
+		0,0, -1,-1, 0,-1, 1,-1, 1,0, 1,1, 0,1, -1,1, -1,0,
+	};
+	
+	// Use poly vertices as seed points for the flood fill.
+	for (int j = 0; j < npoly; ++j)
+	{
+		int cx = 0, cz = 0, ci =-1;
+		int dmin = RC_UNSET_HEIGHT;
+		for (int k = 0; k < 9; ++k)
+		{
+			const int ax = (int)verts[poly[j]*3+0] + offset[k*2+0];
+			const int ay = (int)verts[poly[j]*3+1];
+			const int az = (int)verts[poly[j]*3+2] + offset[k*2+1];
+			if (ax < hp.xmin || ax >= hp.xmin+hp.width ||
+				az < hp.ymin || az >= hp.ymin+hp.height)
+				continue;
+			
+			const rcCompactCell& c = chf.cells[(ax+bs)+(az+bs)*chf.width];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				int d = rcAbs(ay - (int)s.y);
+				if (d < dmin)
+				{
+					cx = ax;
+					cz = az;
+					ci = i;
+					dmin = d;
+				}
+			}
+		}
+		if (ci != -1)
+		{
+			stack.push(cx);
+			stack.push(cz);
+			stack.push(ci);
+		}
+	}
+	
+	// Find center of the polygon using flood fill.
+	int pcx = 0, pcz = 0;
+	for (int j = 0; j < npoly; ++j)
+	{
+		pcx += (int)verts[poly[j]*3+0];
+		pcz += (int)verts[poly[j]*3+2];
+	}
+	pcx /= npoly;
+	pcz /= npoly;
+	
+	for (int i = 0; i < stack.size(); i += 3)
+	{
+		int cx = stack[i+0];
+		int cy = stack[i+1];
+		int idx = cx-hp.xmin+(cy-hp.ymin)*hp.width;
+		hp.data[idx] = 1;
+	}
+	
+	while (stack.size() > 0)
+	{
+		int ci = stack.pop();
+		int cy = stack.pop();
+		int cx = stack.pop();
+		
+		// Check if close to center of the polygon.
+		if (rcAbs(cx-pcx) <= 1 && rcAbs(cy-pcz) <= 1)
+		{
+			stack.resize(0);
+			stack.push(cx);
+			stack.push(cy);
+			stack.push(ci);
+			break;
+		}
+		
+		const rcCompactSpan& cs = chf.spans[ci];
+		
+		for (int dir = 0; dir < 4; ++dir)
+		{
+			if (rcGetCon(cs, dir) == RC_NOT_CONNECTED) continue;
+			
+			const int ax = cx + rcGetDirOffsetX(dir);
+			const int ay = cy + rcGetDirOffsetY(dir);
+			
+			if (ax < hp.xmin || ax >= (hp.xmin+hp.width) ||
+				ay < hp.ymin || ay >= (hp.ymin+hp.height))
+				continue;
+			
+			if (hp.data[ax-hp.xmin+(ay-hp.ymin)*hp.width] != 0)
+				continue;
+			
+			const int ai = (int)chf.cells[(ax+bs)+(ay+bs)*chf.width].index + rcGetCon(cs, dir);
+
+			int idx = ax-hp.xmin+(ay-hp.ymin)*hp.width;
+			hp.data[idx] = 1;
+			
+			stack.push(ax);
+			stack.push(ay);
+			stack.push(ai);
+		}
+	}
+
+	memset(hp.data, 0xff, sizeof(unsigned short)*hp.width*hp.height);
+
+	// Mark start locations.
+	for (int i = 0; i < stack.size(); i += 3)
+	{
+		int cx = stack[i+0];
+		int cy = stack[i+1];
+		int ci = stack[i+2];
+		int idx = cx-hp.xmin+(cy-hp.ymin)*hp.width;
+		const rcCompactSpan& cs = chf.spans[ci];
+		hp.data[idx] = cs.y;
+	}
+	
+	static const int RETRACT_SIZE = 256;
+	int head = 0;
+	
+	while (head*3 < stack.size())
+	{
+		int cx = stack[head*3+0];
+		int cy = stack[head*3+1];
+		int ci = stack[head*3+2];
+		head++;
+		if (head >= RETRACT_SIZE)
+		{
+			head = 0;
+			if (stack.size() > RETRACT_SIZE*3)
+				memmove(&stack[0], &stack[RETRACT_SIZE*3], sizeof(int)*(stack.size()-RETRACT_SIZE*3));
+			stack.resize(stack.size()-RETRACT_SIZE*3);
+		}
+
+		const rcCompactSpan& cs = chf.spans[ci];
+		for (int dir = 0; dir < 4; ++dir)
+		{
+			if (rcGetCon(cs, dir) == RC_NOT_CONNECTED) continue;
+			
+			const int ax = cx + rcGetDirOffsetX(dir);
+			const int ay = cy + rcGetDirOffsetY(dir);
+			
+			if (ax < hp.xmin || ax >= (hp.xmin+hp.width) ||
+				ay < hp.ymin || ay >= (hp.ymin+hp.height))
+				continue;
+			
+			if (hp.data[ax-hp.xmin+(ay-hp.ymin)*hp.width] != RC_UNSET_HEIGHT)
+				continue;
+			
+			const int ai = (int)chf.cells[(ax+bs)+(ay+bs)*chf.width].index + rcGetCon(cs, dir);
+			
+			const rcCompactSpan& as = chf.spans[ai];
+			int idx = ax-hp.xmin+(ay-hp.ymin)*hp.width;
+			hp.data[idx] = as.y;
+
+			stack.push(ax);
+			stack.push(ay);
+			stack.push(ai);
+		}
+	}
+	
+}
+
+static unsigned char getEdgeFlags(const float* va, const float* vb,
+								  const float* vpoly, const int npoly)
+{
+	// Return true if edge (va,vb) is part of the polygon.
+	static const float thrSqr = rcSqr(0.001f);
+	for (int i = 0, j = npoly-1; i < npoly; j=i++)
+	{
+		if (distancePtSeg2d(va, &vpoly[j*3], &vpoly[i*3]) < thrSqr && 
+			distancePtSeg2d(vb, &vpoly[j*3], &vpoly[i*3]) < thrSqr)
+			return 1;
+	}
+	return 0;
+}
+
+static unsigned char getTriFlags(const float* va, const float* vb, const float* vc,
+								 const float* vpoly, const int npoly)
+{
+	unsigned char flags = 0;
+	flags |= getEdgeFlags(va,vb,vpoly,npoly) << 0;
+	flags |= getEdgeFlags(vb,vc,vpoly,npoly) << 2;
+	flags |= getEdgeFlags(vc,va,vpoly,npoly) << 4;
+	return flags;
+}
+
+/// @par
+///
+/// See the #rcConfig documentation for more information on the configuration parameters.
+///
+/// @see rcAllocPolyMeshDetail, rcPolyMesh, rcCompactHeightfield, rcPolyMeshDetail, rcConfig
+bool rcBuildPolyMeshDetail(rcContext* ctx, const rcPolyMesh& mesh, const rcCompactHeightfield& chf,
+						   const float sampleDist, const float sampleMaxError,
+						   rcPolyMeshDetail& dmesh)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_BUILD_POLYMESHDETAIL);
+
+	if (mesh.nverts == 0 || mesh.npolys == 0)
+		return true;
+	
+	const int nvp = mesh.nvp;
+	const float cs = mesh.cs;
+	const float ch = mesh.ch;
+	const float* orig = mesh.bmin;
+	const int borderSize = mesh.borderSize;
+	
+	rcIntArray edges(64);
+	rcIntArray tris(512);
+	rcIntArray stack(512);
+	rcIntArray samples(512);
+	float verts[256*3];
+	rcHeightPatch hp;
+	int nPolyVerts = 0;
+	int maxhw = 0, maxhh = 0;
+	
+	rcScopedDelete<int> bounds = (int*)rcAlloc(sizeof(int)*mesh.npolys*4, RC_ALLOC_TEMP);
+	if (!bounds)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'bounds' (%d).", mesh.npolys*4);
+		return false;
+	}
+	rcScopedDelete<float> poly = (float*)rcAlloc(sizeof(float)*nvp*3, RC_ALLOC_TEMP);
+	if (!poly)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'poly' (%d).", nvp*3);
+		return false;
+	}
+	
+	// Find max size for a polygon area.
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		const unsigned short* p = &mesh.polys[i*nvp*2];
+		int& xmin = bounds[i*4+0];
+		int& xmax = bounds[i*4+1];
+		int& ymin = bounds[i*4+2];
+		int& ymax = bounds[i*4+3];
+		xmin = chf.width;
+		xmax = 0;
+		ymin = chf.height;
+		ymax = 0;
+		for (int j = 0; j < nvp; ++j)
+		{
+			if(p[j] == RC_MESH_NULL_IDX) break;
+			const unsigned short* v = &mesh.verts[p[j]*3];
+			xmin = rcMin(xmin, (int)v[0]);
+			xmax = rcMax(xmax, (int)v[0]);
+			ymin = rcMin(ymin, (int)v[2]);
+			ymax = rcMax(ymax, (int)v[2]);
+			nPolyVerts++;
+		}
+		xmin = rcMax(0,xmin-1);
+		xmax = rcMin(chf.width,xmax+1);
+		ymin = rcMax(0,ymin-1);
+		ymax = rcMin(chf.height,ymax+1);
+		if (xmin >= xmax || ymin >= ymax) continue;
+		maxhw = rcMax(maxhw, xmax-xmin);
+		maxhh = rcMax(maxhh, ymax-ymin);
+	}
+	
+	hp.data = (unsigned short*)rcAlloc(sizeof(unsigned short)*maxhw*maxhh, RC_ALLOC_TEMP);
+	if (!hp.data)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'hp.data' (%d).", maxhw*maxhh);
+		return false;
+	}
+	
+	dmesh.nmeshes = mesh.npolys;
+	dmesh.nverts = 0;
+	dmesh.ntris = 0;
+	dmesh.meshes = (unsigned int*)rcAlloc(sizeof(unsigned int)*dmesh.nmeshes*4, RC_ALLOC_PERM);
+	if (!dmesh.meshes)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.meshes' (%d).", dmesh.nmeshes*4);
+		return false;
+	}
+
+	int vcap = nPolyVerts+nPolyVerts/2;
+	int tcap = vcap*2;
+
+	dmesh.nverts = 0;
+	dmesh.verts = (float*)rcAlloc(sizeof(float)*vcap*3, RC_ALLOC_PERM);
+	if (!dmesh.verts)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.verts' (%d).", vcap*3);
+		return false;
+	}
+	dmesh.ntris = 0;
+	dmesh.tris = (unsigned char*)rcAlloc(sizeof(unsigned char*)*tcap*4, RC_ALLOC_PERM);
+	if (!dmesh.tris)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.tris' (%d).", tcap*4);
+		return false;
+	}
+	
+	for (int i = 0; i < mesh.npolys; ++i)
+	{
+		const unsigned short* p = &mesh.polys[i*nvp*2];
+		
+		// Store polygon vertices for processing.
+		int npoly = 0;
+		for (int j = 0; j < nvp; ++j)
+		{
+			if(p[j] == RC_MESH_NULL_IDX) break;
+			const unsigned short* v = &mesh.verts[p[j]*3];
+			poly[j*3+0] = v[0]*cs;
+			poly[j*3+1] = v[1]*ch;
+			poly[j*3+2] = v[2]*cs;
+			npoly++;
+		}
+		
+		// Get the height data from the area of the polygon.
+		hp.xmin = bounds[i*4+0];
+		hp.ymin = bounds[i*4+2];
+		hp.width = bounds[i*4+1]-bounds[i*4+0];
+		hp.height = bounds[i*4+3]-bounds[i*4+2];
+		getHeightData(chf, p, npoly, mesh.verts, borderSize, hp, stack);
+		
+		// Build detail mesh.
+		int nverts = 0;
+		if (!buildPolyDetail(ctx, poly, npoly,
+							 sampleDist, sampleMaxError,
+							 chf, hp, verts, nverts, tris,
+							 edges, samples))
+		{
+			return false;
+		}
+
+		// Move detail verts to world space.
+		for (int j = 0; j < nverts; ++j)
+		{
+			verts[j*3+0] += orig[0];
+			verts[j*3+1] += orig[1] + chf.ch; // Is this offset necessary?
+			verts[j*3+2] += orig[2];
+		}
+		// Offset poly too, will be used to flag checking.
+		for (int j = 0; j < npoly; ++j)
+		{
+			poly[j*3+0] += orig[0];
+			poly[j*3+1] += orig[1];
+			poly[j*3+2] += orig[2];
+		}
+	
+		// Store detail submesh.
+		const int ntris = tris.size()/4;
+
+		dmesh.meshes[i*4+0] = (unsigned int)dmesh.nverts;
+		dmesh.meshes[i*4+1] = (unsigned int)nverts;
+		dmesh.meshes[i*4+2] = (unsigned int)dmesh.ntris;
+		dmesh.meshes[i*4+3] = (unsigned int)ntris;		
+		
+		// Store vertices, allocate more memory if necessary.
+		if (dmesh.nverts+nverts > vcap)
+		{
+			while (dmesh.nverts+nverts > vcap)
+				vcap += 256;
+				
+			float* newv = (float*)rcAlloc(sizeof(float)*vcap*3, RC_ALLOC_PERM);
+			if (!newv)
+			{
+				ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'newv' (%d).", vcap*3);
+				return false;
+			}
+			if (dmesh.nverts)
+				memcpy(newv, dmesh.verts, sizeof(float)*3*dmesh.nverts);
+			rcFree(dmesh.verts);
+			dmesh.verts = newv;
+		}
+		for (int j = 0; j < nverts; ++j)
+		{
+			dmesh.verts[dmesh.nverts*3+0] = verts[j*3+0];
+			dmesh.verts[dmesh.nverts*3+1] = verts[j*3+1];
+			dmesh.verts[dmesh.nverts*3+2] = verts[j*3+2];
+			dmesh.nverts++;
+		}
+		
+		// Store triangles, allocate more memory if necessary.
+		if (dmesh.ntris+ntris > tcap)
+		{
+			while (dmesh.ntris+ntris > tcap)
+				tcap += 256;
+			unsigned char* newt = (unsigned char*)rcAlloc(sizeof(unsigned char)*tcap*4, RC_ALLOC_PERM);
+			if (!newt)
+			{
+				ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'newt' (%d).", tcap*4);
+				return false;
+			}
+			if (dmesh.ntris)
+				memcpy(newt, dmesh.tris, sizeof(unsigned char)*4*dmesh.ntris);
+			rcFree(dmesh.tris);
+			dmesh.tris = newt;
+		}
+		for (int j = 0; j < ntris; ++j)
+		{
+			const int* t = &tris[j*4];
+			dmesh.tris[dmesh.ntris*4+0] = (unsigned char)t[0];
+			dmesh.tris[dmesh.ntris*4+1] = (unsigned char)t[1];
+			dmesh.tris[dmesh.ntris*4+2] = (unsigned char)t[2];
+			dmesh.tris[dmesh.ntris*4+3] = getTriFlags(&verts[t[0]*3], &verts[t[1]*3], &verts[t[2]*3], poly, npoly);
+			dmesh.ntris++;
+		}
+	}
+		
+	ctx->stopTimer(RC_TIMER_BUILD_POLYMESHDETAIL);
+
+	return true;
+}
+
+/// @see rcAllocPolyMeshDetail, rcPolyMeshDetail
+bool rcMergePolyMeshDetails(rcContext* ctx, rcPolyMeshDetail** meshes, const int nmeshes, rcPolyMeshDetail& mesh)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_MERGE_POLYMESHDETAIL);
+
+	int maxVerts = 0;
+	int maxTris = 0;
+	int maxMeshes = 0;
+
+	for (int i = 0; i < nmeshes; ++i)
+	{
+		if (!meshes[i]) continue;
+		maxVerts += meshes[i]->nverts;
+		maxTris += meshes[i]->ntris;
+		maxMeshes += meshes[i]->nmeshes;
+	}
+
+	mesh.nmeshes = 0;
+	mesh.meshes = (unsigned int*)rcAlloc(sizeof(unsigned int)*maxMeshes*4, RC_ALLOC_PERM);
+	if (!mesh.meshes)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'pmdtl.meshes' (%d).", maxMeshes*4);
+		return false;
+	}
+
+	mesh.ntris = 0;
+	mesh.tris = (unsigned char*)rcAlloc(sizeof(unsigned char)*maxTris*4, RC_ALLOC_PERM);
+	if (!mesh.tris)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.tris' (%d).", maxTris*4);
+		return false;
+	}
+
+	mesh.nverts = 0;
+	mesh.verts = (float*)rcAlloc(sizeof(float)*maxVerts*3, RC_ALLOC_PERM);
+	if (!mesh.verts)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildPolyMeshDetail: Out of memory 'dmesh.verts' (%d).", maxVerts*3);
+		return false;
+	}
+	
+	// Merge datas.
+	for (int i = 0; i < nmeshes; ++i)
+	{
+		rcPolyMeshDetail* dm = meshes[i];
+		if (!dm) continue;
+		for (int j = 0; j < dm->nmeshes; ++j)
+		{
+			unsigned int* dst = &mesh.meshes[mesh.nmeshes*4];
+			unsigned int* src = &dm->meshes[j*4];
+			dst[0] = (unsigned int)mesh.nverts+src[0];
+			dst[1] = src[1];
+			dst[2] = (unsigned int)mesh.ntris+src[2];
+			dst[3] = src[3];
+			mesh.nmeshes++;
+		}
+			
+		for (int k = 0; k < dm->nverts; ++k)
+		{
+			rcVcopy(&mesh.verts[mesh.nverts*3], &dm->verts[k*3]);
+			mesh.nverts++;
+		}
+		for (int k = 0; k < dm->ntris; ++k)
+		{
+			mesh.tris[mesh.ntris*4+0] = dm->tris[k*4+0];
+			mesh.tris[mesh.ntris*4+1] = dm->tris[k*4+1];
+			mesh.tris[mesh.ntris*4+2] = dm->tris[k*4+2];
+			mesh.tris[mesh.ntris*4+3] = dm->tris[k*4+3];
+			mesh.ntris++;
+		}
+	}
+
+	ctx->stopTimer(RC_TIMER_MERGE_POLYMESHDETAIL);
+	
+	return true;
+}
+
diff --git a/Engine/lib/recast/Recast/Source/RecastRasterization.cpp b/Engine/lib/recast/Recast/Source/RecastRasterization.cpp
new file mode 100644
index 000000000..d2bb7c98f
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastRasterization.cpp
@@ -0,0 +1,387 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+
+inline bool overlapBounds(const float* amin, const float* amax, const float* bmin, const float* bmax)
+{
+	bool overlap = true;
+	overlap = (amin[0] > bmax[0] || amax[0] < bmin[0]) ? false : overlap;
+	overlap = (amin[1] > bmax[1] || amax[1] < bmin[1]) ? false : overlap;
+	overlap = (amin[2] > bmax[2] || amax[2] < bmin[2]) ? false : overlap;
+	return overlap;
+}
+
+inline bool overlapInterval(unsigned short amin, unsigned short amax,
+							unsigned short bmin, unsigned short bmax)
+{
+	if (amax < bmin) return false;
+	if (amin > bmax) return false;
+	return true;
+}
+
+
+static rcSpan* allocSpan(rcHeightfield& hf)
+{
+	// If running out of memory, allocate new page and update the freelist.
+	if (!hf.freelist || !hf.freelist->next)
+	{
+		// Create new page.
+		// Allocate memory for the new pool.
+		rcSpanPool* pool = (rcSpanPool*)rcAlloc(sizeof(rcSpanPool), RC_ALLOC_PERM);
+		if (!pool) return 0;
+		pool->next = 0;
+		// Add the pool into the list of pools.
+		pool->next = hf.pools;
+		hf.pools = pool;
+		// Add new items to the free list.
+		rcSpan* freelist = hf.freelist;
+		rcSpan* head = &pool->items[0];
+		rcSpan* it = &pool->items[RC_SPANS_PER_POOL];
+		do
+		{
+			--it;
+			it->next = freelist;
+			freelist = it;
+		}
+		while (it != head);
+		hf.freelist = it;
+	}
+	
+	// Pop item from in front of the free list.
+	rcSpan* it = hf.freelist;
+	hf.freelist = hf.freelist->next;
+	return it;
+}
+
+static void freeSpan(rcHeightfield& hf, rcSpan* ptr)
+{
+	if (!ptr) return;
+	// Add the node in front of the free list.
+	ptr->next = hf.freelist;
+	hf.freelist = ptr;
+}
+
+static void addSpan(rcHeightfield& hf, const int x, const int y,
+					const unsigned short smin, const unsigned short smax,
+					const unsigned char area, const int flagMergeThr)
+{
+	
+	int idx = x + y*hf.width;
+	
+	rcSpan* s = allocSpan(hf);
+	s->smin = smin;
+	s->smax = smax;
+	s->area = area;
+	s->next = 0;
+	
+	// Empty cell, add he first span.
+	if (!hf.spans[idx])
+	{
+		hf.spans[idx] = s;
+		return;
+	}
+	rcSpan* prev = 0;
+	rcSpan* cur = hf.spans[idx];
+	
+	// Insert and merge spans.
+	while (cur)
+	{
+		if (cur->smin > s->smax)
+		{
+			// Current span is further than the new span, break.
+			break;
+		}
+		else if (cur->smax < s->smin)
+		{
+			// Current span is before the new span advance.
+			prev = cur;
+			cur = cur->next;
+		}
+		else
+		{
+			// Merge spans.
+			if (cur->smin < s->smin)
+				s->smin = cur->smin;
+			if (cur->smax > s->smax)
+				s->smax = cur->smax;
+			
+			// Merge flags.
+			if (rcAbs((int)s->smax - (int)cur->smax) <= flagMergeThr)
+				s->area = rcMax(s->area, cur->area);
+			
+			// Remove current span.
+			rcSpan* next = cur->next;
+			freeSpan(hf, cur);
+			if (prev)
+				prev->next = next;
+			else
+				hf.spans[idx] = next;
+			cur = next;
+		}
+	}
+	
+	// Insert new span.
+	if (prev)
+	{
+		s->next = prev->next;
+		prev->next = s;
+	}
+	else
+	{
+		s->next = hf.spans[idx];
+		hf.spans[idx] = s;
+	}
+}
+
+/// @par
+///
+/// The span addition can be set to favor flags. If the span is merged to
+/// another span and the new @p smax is within @p flagMergeThr units
+/// from the existing span, the span flags are merged.
+///
+/// @see rcHeightfield, rcSpan.
+void rcAddSpan(rcContext* /*ctx*/, rcHeightfield& hf, const int x, const int y,
+			   const unsigned short smin, const unsigned short smax,
+			   const unsigned char area, const int flagMergeThr)
+{
+//	rcAssert(ctx);
+	addSpan(hf, x,y, smin, smax, area, flagMergeThr);
+}
+
+static int clipPoly(const float* in, int n, float* out, float pnx, float pnz, float pd)
+{
+	float d[12];
+	for (int i = 0; i < n; ++i)
+		d[i] = pnx*in[i*3+0] + pnz*in[i*3+2] + pd;
+	
+	int m = 0;
+	for (int i = 0, j = n-1; i < n; j=i, ++i)
+	{
+		bool ina = d[j] >= 0;
+		bool inb = d[i] >= 0;
+		if (ina != inb)
+		{
+			float s = d[j] / (d[j] - d[i]);
+			out[m*3+0] = in[j*3+0] + (in[i*3+0] - in[j*3+0])*s;
+			out[m*3+1] = in[j*3+1] + (in[i*3+1] - in[j*3+1])*s;
+			out[m*3+2] = in[j*3+2] + (in[i*3+2] - in[j*3+2])*s;
+			m++;
+		}
+		if (inb)
+		{
+			out[m*3+0] = in[i*3+0];
+			out[m*3+1] = in[i*3+1];
+			out[m*3+2] = in[i*3+2];
+			m++;
+		}
+	}
+	return m;
+}
+
+static void rasterizeTri(const float* v0, const float* v1, const float* v2,
+						 const unsigned char area, rcHeightfield& hf,
+						 const float* bmin, const float* bmax,
+						 const float cs, const float ics, const float ich,
+						 const int flagMergeThr)
+{
+	const int w = hf.width;
+	const int h = hf.height;
+	float tmin[3], tmax[3];
+	const float by = bmax[1] - bmin[1];
+	
+	// Calculate the bounding box of the triangle.
+	rcVcopy(tmin, v0);
+	rcVcopy(tmax, v0);
+	rcVmin(tmin, v1);
+	rcVmin(tmin, v2);
+	rcVmax(tmax, v1);
+	rcVmax(tmax, v2);
+	
+	// If the triangle does not touch the bbox of the heightfield, skip the triagle.
+	if (!overlapBounds(bmin, bmax, tmin, tmax))
+		return;
+	
+	// Calculate the footpring of the triangle on the grid.
+	int x0 = (int)((tmin[0] - bmin[0])*ics);
+	int y0 = (int)((tmin[2] - bmin[2])*ics);
+	int x1 = (int)((tmax[0] - bmin[0])*ics);
+	int y1 = (int)((tmax[2] - bmin[2])*ics);
+	x0 = rcClamp(x0, 0, w-1);
+	y0 = rcClamp(y0, 0, h-1);
+	x1 = rcClamp(x1, 0, w-1);
+	y1 = rcClamp(y1, 0, h-1);
+	
+	// Clip the triangle into all grid cells it touches.
+	float in[7*3], out[7*3], inrow[7*3];
+	
+	for (int y = y0; y <= y1; ++y)
+	{
+		// Clip polygon to row.
+		rcVcopy(&in[0], v0);
+		rcVcopy(&in[1*3], v1);
+		rcVcopy(&in[2*3], v2);
+		int nvrow = 3;
+		const float cz = bmin[2] + y*cs;
+		nvrow = clipPoly(in, nvrow, out, 0, 1, -cz);
+		if (nvrow < 3) continue;
+		nvrow = clipPoly(out, nvrow, inrow, 0, -1, cz+cs);
+		if (nvrow < 3) continue;
+		
+		for (int x = x0; x <= x1; ++x)
+		{
+			// Clip polygon to column.
+			int nv = nvrow;
+			const float cx = bmin[0] + x*cs;
+			nv = clipPoly(inrow, nv, out, 1, 0, -cx);
+			if (nv < 3) continue;
+			nv = clipPoly(out, nv, in, -1, 0, cx+cs);
+			if (nv < 3) continue;
+			
+			// Calculate min and max of the span.
+			float smin = in[1], smax = in[1];
+			for (int i = 1; i < nv; ++i)
+			{
+				smin = rcMin(smin, in[i*3+1]);
+				smax = rcMax(smax, in[i*3+1]);
+			}
+			smin -= bmin[1];
+			smax -= bmin[1];
+			// Skip the span if it is outside the heightfield bbox
+			if (smax < 0.0f) continue;
+			if (smin > by) continue;
+			// Clamp the span to the heightfield bbox.
+			if (smin < 0.0f) smin = 0;
+			if (smax > by) smax = by;
+			
+			// Snap the span to the heightfield height grid.
+			unsigned short ismin = (unsigned short)rcClamp((int)floorf(smin * ich), 0, RC_SPAN_MAX_HEIGHT);
+			unsigned short ismax = (unsigned short)rcClamp((int)ceilf(smax * ich), (int)ismin+1, RC_SPAN_MAX_HEIGHT);
+			
+			addSpan(hf, x, y, ismin, ismax, area, flagMergeThr);
+		}
+	}
+}
+
+/// @par
+///
+/// No spans will be added if the triangle does not overlap the heightfield grid.
+///
+/// @see rcHeightfield
+void rcRasterizeTriangle(rcContext* ctx, const float* v0, const float* v1, const float* v2,
+						 const unsigned char area, rcHeightfield& solid,
+						 const int flagMergeThr)
+{
+	rcAssert(ctx);
+
+	ctx->startTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+
+	const float ics = 1.0f/solid.cs;
+	const float ich = 1.0f/solid.ch;
+	rasterizeTri(v0, v1, v2, area, solid, solid.bmin, solid.bmax, solid.cs, ics, ich, flagMergeThr);
+
+	ctx->stopTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+}
+
+/// @par
+///
+/// Spans will only be added for triangles that overlap the heightfield grid.
+///
+/// @see rcHeightfield
+void rcRasterizeTriangles(rcContext* ctx, const float* verts, const int /*nv*/,
+						  const int* tris, const unsigned char* areas, const int nt,
+						  rcHeightfield& solid, const int flagMergeThr)
+{
+	rcAssert(ctx);
+
+	ctx->startTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+	
+	const float ics = 1.0f/solid.cs;
+	const float ich = 1.0f/solid.ch;
+	// Rasterize triangles.
+	for (int i = 0; i < nt; ++i)
+	{
+		const float* v0 = &verts[tris[i*3+0]*3];
+		const float* v1 = &verts[tris[i*3+1]*3];
+		const float* v2 = &verts[tris[i*3+2]*3];
+		// Rasterize.
+		rasterizeTri(v0, v1, v2, areas[i], solid, solid.bmin, solid.bmax, solid.cs, ics, ich, flagMergeThr);
+	}
+	
+	ctx->stopTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+}
+
+/// @par
+///
+/// Spans will only be added for triangles that overlap the heightfield grid.
+///
+/// @see rcHeightfield
+void rcRasterizeTriangles(rcContext* ctx, const float* verts, const int /*nv*/,
+						  const unsigned short* tris, const unsigned char* areas, const int nt,
+						  rcHeightfield& solid, const int flagMergeThr)
+{
+	rcAssert(ctx);
+
+	ctx->startTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+	
+	const float ics = 1.0f/solid.cs;
+	const float ich = 1.0f/solid.ch;
+	// Rasterize triangles.
+	for (int i = 0; i < nt; ++i)
+	{
+		const float* v0 = &verts[tris[i*3+0]*3];
+		const float* v1 = &verts[tris[i*3+1]*3];
+		const float* v2 = &verts[tris[i*3+2]*3];
+		// Rasterize.
+		rasterizeTri(v0, v1, v2, areas[i], solid, solid.bmin, solid.bmax, solid.cs, ics, ich, flagMergeThr);
+	}
+	
+	ctx->stopTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+}
+
+/// @par
+///
+/// Spans will only be added for triangles that overlap the heightfield grid.
+///
+/// @see rcHeightfield
+void rcRasterizeTriangles(rcContext* ctx, const float* verts, const unsigned char* areas, const int nt,
+						  rcHeightfield& solid, const int flagMergeThr)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+	
+	const float ics = 1.0f/solid.cs;
+	const float ich = 1.0f/solid.ch;
+	// Rasterize triangles.
+	for (int i = 0; i < nt; ++i)
+	{
+		const float* v0 = &verts[(i*3+0)*3];
+		const float* v1 = &verts[(i*3+1)*3];
+		const float* v2 = &verts[(i*3+2)*3];
+		// Rasterize.
+		rasterizeTri(v0, v1, v2, areas[i], solid, solid.bmin, solid.bmax, solid.cs, ics, ich, flagMergeThr);
+	}
+	
+	ctx->stopTimer(RC_TIMER_RASTERIZE_TRIANGLES);
+}
diff --git a/Engine/lib/recast/Recast/Source/RecastRegion.cpp b/Engine/lib/recast/Recast/Source/RecastRegion.cpp
new file mode 100644
index 000000000..76e631cc5
--- /dev/null
+++ b/Engine/lib/recast/Recast/Source/RecastRegion.cpp
@@ -0,0 +1,1337 @@
+//
+// Copyright (c) 2009-2010 Mikko Mononen memon@inside.org
+//
+// This software is provided 'as-is', without any express or implied
+// warranty.  In no event will the authors be held liable for any damages
+// arising from the use of this software.
+// Permission is granted to anyone to use this software for any purpose,
+// including commercial applications, and to alter it and redistribute it
+// freely, subject to the following restrictions:
+// 1. The origin of this software must not be misrepresented; you must not
+//    claim that you wrote the original software. If you use this software
+//    in a product, an acknowledgment in the product documentation would be
+//    appreciated but is not required.
+// 2. Altered source versions must be plainly marked as such, and must not be
+//    misrepresented as being the original software.
+// 3. This notice may not be removed or altered from any source distribution.
+//
+
+#include <float.h>
+#define _USE_MATH_DEFINES
+#include <math.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "Recast.h"
+#include "RecastAlloc.h"
+#include "RecastAssert.h"
+#include <new>
+
+
+static void calculateDistanceField(rcCompactHeightfield& chf, unsigned short* src, unsigned short& maxDist)
+{
+	const int w = chf.width;
+	const int h = chf.height;
+	
+	// Init distance and points.
+	for (int i = 0; i < chf.spanCount; ++i)
+		src[i] = 0xffff;
+	
+	// Mark boundary cells.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				const unsigned char area = chf.areas[i];
+				
+				int nc = 0;
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+					{
+						const int ax = x + rcGetDirOffsetX(dir);
+						const int ay = y + rcGetDirOffsetY(dir);
+						const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, dir);
+						if (area == chf.areas[ai])
+							nc++;
+					}
+				}
+				if (nc != 4)
+					src[i] = 0;
+			}
+		}
+	}
+	
+			
+	// Pass 1
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				
+				if (rcGetCon(s, 0) != RC_NOT_CONNECTED)
+				{
+					// (-1,0)
+					const int ax = x + rcGetDirOffsetX(0);
+					const int ay = y + rcGetDirOffsetY(0);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 0);
+					const rcCompactSpan& as = chf.spans[ai];
+					if (src[ai]+2 < src[i])
+						src[i] = src[ai]+2;
+					
+					// (-1,-1)
+					if (rcGetCon(as, 3) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(3);
+						const int aay = ay + rcGetDirOffsetY(3);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 3);
+						if (src[aai]+3 < src[i])
+							src[i] = src[aai]+3;
+					}
+				}
+				if (rcGetCon(s, 3) != RC_NOT_CONNECTED)
+				{
+					// (0,-1)
+					const int ax = x + rcGetDirOffsetX(3);
+					const int ay = y + rcGetDirOffsetY(3);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 3);
+					const rcCompactSpan& as = chf.spans[ai];
+					if (src[ai]+2 < src[i])
+						src[i] = src[ai]+2;
+					
+					// (1,-1)
+					if (rcGetCon(as, 2) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(2);
+						const int aay = ay + rcGetDirOffsetY(2);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 2);
+						if (src[aai]+3 < src[i])
+							src[i] = src[aai]+3;
+					}
+				}
+			}
+		}
+	}
+	
+	// Pass 2
+	for (int y = h-1; y >= 0; --y)
+	{
+		for (int x = w-1; x >= 0; --x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				
+				if (rcGetCon(s, 2) != RC_NOT_CONNECTED)
+				{
+					// (1,0)
+					const int ax = x + rcGetDirOffsetX(2);
+					const int ay = y + rcGetDirOffsetY(2);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 2);
+					const rcCompactSpan& as = chf.spans[ai];
+					if (src[ai]+2 < src[i])
+						src[i] = src[ai]+2;
+					
+					// (1,1)
+					if (rcGetCon(as, 1) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(1);
+						const int aay = ay + rcGetDirOffsetY(1);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 1);
+						if (src[aai]+3 < src[i])
+							src[i] = src[aai]+3;
+					}
+				}
+				if (rcGetCon(s, 1) != RC_NOT_CONNECTED)
+				{
+					// (0,1)
+					const int ax = x + rcGetDirOffsetX(1);
+					const int ay = y + rcGetDirOffsetY(1);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 1);
+					const rcCompactSpan& as = chf.spans[ai];
+					if (src[ai]+2 < src[i])
+						src[i] = src[ai]+2;
+					
+					// (-1,1)
+					if (rcGetCon(as, 0) != RC_NOT_CONNECTED)
+					{
+						const int aax = ax + rcGetDirOffsetX(0);
+						const int aay = ay + rcGetDirOffsetY(0);
+						const int aai = (int)chf.cells[aax+aay*w].index + rcGetCon(as, 0);
+						if (src[aai]+3 < src[i])
+							src[i] = src[aai]+3;
+					}
+				}
+			}
+		}
+	}	
+	
+	maxDist = 0;
+	for (int i = 0; i < chf.spanCount; ++i)
+		maxDist = rcMax(src[i], maxDist);
+	
+}
+
+static unsigned short* boxBlur(rcCompactHeightfield& chf, int thr,
+							   unsigned short* src, unsigned short* dst)
+{
+	const int w = chf.width;
+	const int h = chf.height;
+	
+	thr *= 2;
+	
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				const unsigned short cd = src[i];
+				if (cd <= thr)
+				{
+					dst[i] = cd;
+					continue;
+				}
+
+				int d = (int)cd;
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+					{
+						const int ax = x + rcGetDirOffsetX(dir);
+						const int ay = y + rcGetDirOffsetY(dir);
+						const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, dir);
+						d += (int)src[ai];
+						
+						const rcCompactSpan& as = chf.spans[ai];
+						const int dir2 = (dir+1) & 0x3;
+						if (rcGetCon(as, dir2) != RC_NOT_CONNECTED)
+						{
+							const int ax2 = ax + rcGetDirOffsetX(dir2);
+							const int ay2 = ay + rcGetDirOffsetY(dir2);
+							const int ai2 = (int)chf.cells[ax2+ay2*w].index + rcGetCon(as, dir2);
+							d += (int)src[ai2];
+						}
+						else
+						{
+							d += cd;
+						}
+					}
+					else
+					{
+						d += cd*2;
+					}
+				}
+				dst[i] = (unsigned short)((d+5)/9);
+			}
+		}
+	}
+	return dst;
+}
+
+
+static bool floodRegion(int x, int y, int i,
+						unsigned short level, unsigned short r,
+						rcCompactHeightfield& chf,
+						unsigned short* srcReg, unsigned short* srcDist,
+						rcIntArray& stack)
+{
+	const int w = chf.width;
+	
+	const unsigned char area = chf.areas[i];
+	
+	// Flood fill mark region.
+	stack.resize(0);
+	stack.push((int)x);
+	stack.push((int)y);
+	stack.push((int)i);
+	srcReg[i] = r;
+	srcDist[i] = 0;
+	
+	unsigned short lev = level >= 2 ? level-2 : 0;
+	int count = 0;
+	
+	while (stack.size() > 0)
+	{
+		int ci = stack.pop();
+		int cy = stack.pop();
+		int cx = stack.pop();
+		
+		const rcCompactSpan& cs = chf.spans[ci];
+		
+		// Check if any of the neighbours already have a valid region set.
+		unsigned short ar = 0;
+		for (int dir = 0; dir < 4; ++dir)
+		{
+			// 8 connected
+			if (rcGetCon(cs, dir) != RC_NOT_CONNECTED)
+			{
+				const int ax = cx + rcGetDirOffsetX(dir);
+				const int ay = cy + rcGetDirOffsetY(dir);
+				const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(cs, dir);
+				if (chf.areas[ai] != area)
+					continue;
+				unsigned short nr = srcReg[ai];
+				if (nr & RC_BORDER_REG) // Do not take borders into account.
+					continue;
+				if (nr != 0 && nr != r)
+					ar = nr;
+				
+				const rcCompactSpan& as = chf.spans[ai];
+				
+				const int dir2 = (dir+1) & 0x3;
+				if (rcGetCon(as, dir2) != RC_NOT_CONNECTED)
+				{
+					const int ax2 = ax + rcGetDirOffsetX(dir2);
+					const int ay2 = ay + rcGetDirOffsetY(dir2);
+					const int ai2 = (int)chf.cells[ax2+ay2*w].index + rcGetCon(as, dir2);
+					if (chf.areas[ai2] != area)
+						continue;
+					unsigned short nr2 = srcReg[ai2];
+					if (nr2 != 0 && nr2 != r)
+						ar = nr2;
+				}				
+			}
+		}
+		if (ar != 0)
+		{
+			srcReg[ci] = 0;
+			continue;
+		}
+		count++;
+		
+		// Expand neighbours.
+		for (int dir = 0; dir < 4; ++dir)
+		{
+			if (rcGetCon(cs, dir) != RC_NOT_CONNECTED)
+			{
+				const int ax = cx + rcGetDirOffsetX(dir);
+				const int ay = cy + rcGetDirOffsetY(dir);
+				const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(cs, dir);
+				if (chf.areas[ai] != area)
+					continue;
+				if (chf.dist[ai] >= lev && srcReg[ai] == 0)
+				{
+					srcReg[ai] = r;
+					srcDist[ai] = 0;
+					stack.push(ax);
+					stack.push(ay);
+					stack.push(ai);
+				}
+			}
+		}
+	}
+	
+	return count > 0;
+}
+
+static unsigned short* expandRegions(int maxIter, unsigned short level,
+									 rcCompactHeightfield& chf,
+									 unsigned short* srcReg, unsigned short* srcDist,
+									 unsigned short* dstReg, unsigned short* dstDist, 
+									 rcIntArray& stack)
+{
+	const int w = chf.width;
+	const int h = chf.height;
+
+	// Find cells revealed by the raised level.
+	stack.resize(0);
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				if (chf.dist[i] >= level && srcReg[i] == 0 && chf.areas[i] != RC_NULL_AREA)
+				{
+					stack.push(x);
+					stack.push(y);
+					stack.push(i);
+				}
+			}
+		}
+	}
+	
+	int iter = 0;
+	while (stack.size() > 0)
+	{
+		int failed = 0;
+		
+		memcpy(dstReg, srcReg, sizeof(unsigned short)*chf.spanCount);
+		memcpy(dstDist, srcDist, sizeof(unsigned short)*chf.spanCount);
+		
+		for (int j = 0; j < stack.size(); j += 3)
+		{
+			int x = stack[j+0];
+			int y = stack[j+1];
+			int i = stack[j+2];
+			if (i < 0)
+			{
+				failed++;
+				continue;
+			}
+			
+			unsigned short r = srcReg[i];
+			unsigned short d2 = 0xffff;
+			const unsigned char area = chf.areas[i];
+			const rcCompactSpan& s = chf.spans[i];
+			for (int dir = 0; dir < 4; ++dir)
+			{
+				if (rcGetCon(s, dir) == RC_NOT_CONNECTED) continue;
+				const int ax = x + rcGetDirOffsetX(dir);
+				const int ay = y + rcGetDirOffsetY(dir);
+				const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, dir);
+				if (chf.areas[ai] != area) continue;
+				if (srcReg[ai] > 0 && (srcReg[ai] & RC_BORDER_REG) == 0)
+				{
+					if ((int)srcDist[ai]+2 < (int)d2)
+					{
+						r = srcReg[ai];
+						d2 = srcDist[ai]+2;
+					}
+				}
+			}
+			if (r)
+			{
+				stack[j+2] = -1; // mark as used
+				dstReg[i] = r;
+				dstDist[i] = d2;
+			}
+			else
+			{
+				failed++;
+			}
+		}
+		
+		// rcSwap source and dest.
+		rcSwap(srcReg, dstReg);
+		rcSwap(srcDist, dstDist);
+		
+		if (failed*3 == stack.size())
+			break;
+		
+		if (level > 0)
+		{
+			++iter;
+			if (iter >= maxIter)
+				break;
+		}
+	}
+	
+	return srcReg;
+}
+
+
+struct rcRegion
+{
+	inline rcRegion(unsigned short i) :
+		spanCount(0),
+		id(i),
+		areaType(0),
+		remap(false),
+		visited(false)
+	{}
+	
+	int spanCount;					// Number of spans belonging to this region
+	unsigned short id;				// ID of the region
+	unsigned char areaType;			// Are type.
+	bool remap;
+	bool visited;
+	rcIntArray connections;
+	rcIntArray floors;
+};
+
+static void removeAdjacentNeighbours(rcRegion& reg)
+{
+	// Remove adjacent duplicates.
+	for (int i = 0; i < reg.connections.size() && reg.connections.size() > 1; )
+	{
+		int ni = (i+1) % reg.connections.size();
+		if (reg.connections[i] == reg.connections[ni])
+		{
+			// Remove duplicate
+			for (int j = i; j < reg.connections.size()-1; ++j)
+				reg.connections[j] = reg.connections[j+1];
+			reg.connections.pop();
+		}
+		else
+			++i;
+	}
+}
+
+static void replaceNeighbour(rcRegion& reg, unsigned short oldId, unsigned short newId)
+{
+	bool neiChanged = false;
+	for (int i = 0; i < reg.connections.size(); ++i)
+	{
+		if (reg.connections[i] == oldId)
+		{
+			reg.connections[i] = newId;
+			neiChanged = true;
+		}
+	}
+	for (int i = 0; i < reg.floors.size(); ++i)
+	{
+		if (reg.floors[i] == oldId)
+			reg.floors[i] = newId;
+	}
+	if (neiChanged)
+		removeAdjacentNeighbours(reg);
+}
+
+static bool canMergeWithRegion(const rcRegion& rega, const rcRegion& regb)
+{
+	if (rega.areaType != regb.areaType)
+		return false;
+	int n = 0;
+	for (int i = 0; i < rega.connections.size(); ++i)
+	{
+		if (rega.connections[i] == regb.id)
+			n++;
+	}
+	if (n > 1)
+		return false;
+	for (int i = 0; i < rega.floors.size(); ++i)
+	{
+		if (rega.floors[i] == regb.id)
+			return false;
+	}
+	return true;
+}
+
+static void addUniqueFloorRegion(rcRegion& reg, int n)
+{
+	for (int i = 0; i < reg.floors.size(); ++i)
+		if (reg.floors[i] == n)
+			return;
+	reg.floors.push(n);
+}
+
+static bool mergeRegions(rcRegion& rega, rcRegion& regb)
+{
+	unsigned short aid = rega.id;
+	unsigned short bid = regb.id;
+	
+	// Duplicate current neighbourhood.
+	rcIntArray acon;
+	acon.resize(rega.connections.size());
+	for (int i = 0; i < rega.connections.size(); ++i)
+		acon[i] = rega.connections[i];
+	rcIntArray& bcon = regb.connections;
+	
+	// Find insertion point on A.
+	int insa = -1;
+	for (int i = 0; i < acon.size(); ++i)
+	{
+		if (acon[i] == bid)
+		{
+			insa = i;
+			break;
+		}
+	}
+	if (insa == -1)
+		return false;
+	
+	// Find insertion point on B.
+	int insb = -1;
+	for (int i = 0; i < bcon.size(); ++i)
+	{
+		if (bcon[i] == aid)
+		{
+			insb = i;
+			break;
+		}
+	}
+	if (insb == -1)
+		return false;
+	
+	// Merge neighbours.
+	rega.connections.resize(0);
+	for (int i = 0, ni = acon.size(); i < ni-1; ++i)
+		rega.connections.push(acon[(insa+1+i) % ni]);
+		
+	for (int i = 0, ni = bcon.size(); i < ni-1; ++i)
+		rega.connections.push(bcon[(insb+1+i) % ni]);
+	
+	removeAdjacentNeighbours(rega);
+	
+	for (int j = 0; j < regb.floors.size(); ++j)
+		addUniqueFloorRegion(rega, regb.floors[j]);
+	rega.spanCount += regb.spanCount;
+	regb.spanCount = 0;
+	regb.connections.resize(0);
+
+	return true;
+}
+
+static bool isRegionConnectedToBorder(const rcRegion& reg)
+{
+	// Region is connected to border if
+	// one of the neighbours is null id.
+	for (int i = 0; i < reg.connections.size(); ++i)
+	{
+		if (reg.connections[i] == 0)
+			return true;
+	}
+	return false;
+}
+
+static bool isSolidEdge(rcCompactHeightfield& chf, unsigned short* srcReg,
+						int x, int y, int i, int dir)
+{
+	const rcCompactSpan& s = chf.spans[i];
+	unsigned short r = 0;
+	if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+	{
+		const int ax = x + rcGetDirOffsetX(dir);
+		const int ay = y + rcGetDirOffsetY(dir);
+		const int ai = (int)chf.cells[ax+ay*chf.width].index + rcGetCon(s, dir);
+		r = srcReg[ai];
+	}
+	if (r == srcReg[i])
+		return false;
+	return true;
+}
+
+static void walkContour(int x, int y, int i, int dir,
+						rcCompactHeightfield& chf,
+						unsigned short* srcReg,
+						rcIntArray& cont)
+{
+	int startDir = dir;
+	int starti = i;
+
+	const rcCompactSpan& ss = chf.spans[i];
+	unsigned short curReg = 0;
+	if (rcGetCon(ss, dir) != RC_NOT_CONNECTED)
+	{
+		const int ax = x + rcGetDirOffsetX(dir);
+		const int ay = y + rcGetDirOffsetY(dir);
+		const int ai = (int)chf.cells[ax+ay*chf.width].index + rcGetCon(ss, dir);
+		curReg = srcReg[ai];
+	}
+	cont.push(curReg);
+			
+	int iter = 0;
+	while (++iter < 40000)
+	{
+		const rcCompactSpan& s = chf.spans[i];
+		
+		if (isSolidEdge(chf, srcReg, x, y, i, dir))
+		{
+			// Choose the edge corner
+			unsigned short r = 0;
+			if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+			{
+				const int ax = x + rcGetDirOffsetX(dir);
+				const int ay = y + rcGetDirOffsetY(dir);
+				const int ai = (int)chf.cells[ax+ay*chf.width].index + rcGetCon(s, dir);
+				r = srcReg[ai];
+			}
+			if (r != curReg)
+			{
+				curReg = r;
+				cont.push(curReg);
+			}
+			
+			dir = (dir+1) & 0x3;  // Rotate CW
+		}
+		else
+		{
+			int ni = -1;
+			const int nx = x + rcGetDirOffsetX(dir);
+			const int ny = y + rcGetDirOffsetY(dir);
+			if (rcGetCon(s, dir) != RC_NOT_CONNECTED)
+			{
+				const rcCompactCell& nc = chf.cells[nx+ny*chf.width];
+				ni = (int)nc.index + rcGetCon(s, dir);
+			}
+			if (ni == -1)
+			{
+				// Should not happen.
+				return;
+			}
+			x = nx;
+			y = ny;
+			i = ni;
+			dir = (dir+3) & 0x3;	// Rotate CCW
+		}
+		
+		if (starti == i && startDir == dir)
+		{
+			break;
+		}
+	}
+
+	// Remove adjacent duplicates.
+	if (cont.size() > 1)
+	{
+		for (int j = 0; j < cont.size(); )
+		{
+			int nj = (j+1) % cont.size();
+			if (cont[j] == cont[nj])
+			{
+				for (int k = j; k < cont.size()-1; ++k)
+					cont[k] = cont[k+1];
+				cont.pop();
+			}
+			else
+				++j;
+		}
+	}
+}
+
+static bool filterSmallRegions(rcContext* ctx, int minRegionArea, int mergeRegionSize,
+							   unsigned short& maxRegionId,
+							   rcCompactHeightfield& chf,
+							   unsigned short* srcReg)
+{
+	const int w = chf.width;
+	const int h = chf.height;
+	
+	const int nreg = maxRegionId+1;
+	rcRegion* regions = (rcRegion*)rcAlloc(sizeof(rcRegion)*nreg, RC_ALLOC_TEMP);
+	if (!regions)
+	{
+		ctx->log(RC_LOG_ERROR, "filterSmallRegions: Out of memory 'regions' (%d).", nreg);
+		return false;
+	}
+
+	// Construct regions
+	for (int i = 0; i < nreg; ++i)
+		new(&regions[i]) rcRegion((unsigned short)i);
+	
+	// Find edge of a region and find connections around the contour.
+	for (int y = 0; y < h; ++y)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				unsigned short r = srcReg[i];
+				if (r == 0 || r >= nreg)
+					continue;
+				
+				rcRegion& reg = regions[r];
+				reg.spanCount++;
+				
+				
+				// Update floors.
+				for (int j = (int)c.index; j < ni; ++j)
+				{
+					if (i == j) continue;
+					unsigned short floorId = srcReg[j];
+					if (floorId == 0 || floorId >= nreg)
+						continue;
+					addUniqueFloorRegion(reg, floorId);
+				}
+				
+				// Have found contour
+				if (reg.connections.size() > 0)
+					continue;
+				
+				reg.areaType = chf.areas[i];
+				
+				// Check if this cell is next to a border.
+				int ndir = -1;
+				for (int dir = 0; dir < 4; ++dir)
+				{
+					if (isSolidEdge(chf, srcReg, x, y, i, dir))
+					{
+						ndir = dir;
+						break;
+					}
+				}
+				
+				if (ndir != -1)
+				{
+					// The cell is at border.
+					// Walk around the contour to find all the neighbours.
+					walkContour(x, y, i, ndir, chf, srcReg, reg.connections);
+				}
+			}
+		}
+	}
+
+	// Remove too small regions.
+	rcIntArray stack(32);
+	rcIntArray trace(32);
+	for (int i = 0; i < nreg; ++i)
+	{
+		rcRegion& reg = regions[i];
+		if (reg.id == 0 || (reg.id & RC_BORDER_REG))
+			continue;                       
+		if (reg.spanCount == 0)
+			continue;
+		if (reg.visited)
+			continue;
+		
+		// Count the total size of all the connected regions.
+		// Also keep track of the regions connects to a tile border.
+		bool connectsToBorder = false;
+		int spanCount = 0;
+		stack.resize(0);
+		trace.resize(0);
+
+		reg.visited = true;
+		stack.push(i);
+		
+		while (stack.size())
+		{
+			// Pop
+			int ri = stack.pop();
+			
+			rcRegion& creg = regions[ri];
+
+			spanCount += creg.spanCount;
+			trace.push(ri);
+
+			for (int j = 0; j < creg.connections.size(); ++j)
+			{
+				if (creg.connections[j] & RC_BORDER_REG)
+				{
+					connectsToBorder = true;
+					continue;
+				}
+				rcRegion& neireg = regions[creg.connections[j]];
+				if (neireg.visited)
+					continue;
+				if (neireg.id == 0 || (neireg.id & RC_BORDER_REG))
+					continue;
+				// Visit
+				stack.push(neireg.id);
+				neireg.visited = true;
+			}
+		}
+		
+		// If the accumulated regions size is too small, remove it.
+		// Do not remove areas which connect to tile borders
+		// as their size cannot be estimated correctly and removing them
+		// can potentially remove necessary areas.
+		if (spanCount < minRegionArea && !connectsToBorder)
+		{
+			// Kill all visited regions.
+			for (int j = 0; j < trace.size(); ++j)
+			{
+				regions[trace[j]].spanCount = 0;
+				regions[trace[j]].id = 0;
+			}
+		}
+	}
+		
+	// Merge too small regions to neighbour regions.
+	int mergeCount = 0 ;
+	do
+	{
+		mergeCount = 0;
+		for (int i = 0; i < nreg; ++i)
+		{
+			rcRegion& reg = regions[i];
+			if (reg.id == 0 || (reg.id & RC_BORDER_REG))
+				continue;                       
+			if (reg.spanCount == 0)
+				continue;
+			
+			// Check to see if the region should be merged.
+			if (reg.spanCount > mergeRegionSize && isRegionConnectedToBorder(reg))
+				continue;
+			
+			// Small region with more than 1 connection.
+			// Or region which is not connected to a border at all.
+			// Find smallest neighbour region that connects to this one.
+			int smallest = 0xfffffff;
+			unsigned short mergeId = reg.id;
+			for (int j = 0; j < reg.connections.size(); ++j)
+			{
+				if (reg.connections[j] & RC_BORDER_REG) continue;
+				rcRegion& mreg = regions[reg.connections[j]];
+				if (mreg.id == 0 || (mreg.id & RC_BORDER_REG)) continue;
+				if (mreg.spanCount < smallest &&
+					canMergeWithRegion(reg, mreg) &&
+					canMergeWithRegion(mreg, reg))
+				{
+					smallest = mreg.spanCount;
+					mergeId = mreg.id;
+				}
+			}
+			// Found new id.
+			if (mergeId != reg.id)
+			{
+				unsigned short oldId = reg.id;
+				rcRegion& target = regions[mergeId];
+				
+				// Merge neighbours.
+				if (mergeRegions(target, reg))
+				{
+					// Fixup regions pointing to current region.
+					for (int j = 0; j < nreg; ++j)
+					{
+						if (regions[j].id == 0 || (regions[j].id & RC_BORDER_REG)) continue;
+						// If another region was already merged into current region
+						// change the nid of the previous region too.
+						if (regions[j].id == oldId)
+							regions[j].id = mergeId;
+						// Replace the current region with the new one if the
+						// current regions is neighbour.
+						replaceNeighbour(regions[j], oldId, mergeId);
+					}
+					mergeCount++;
+				}
+			}
+		}
+	}
+	while (mergeCount > 0);
+	
+	// Compress region Ids.
+	for (int i = 0; i < nreg; ++i)
+	{
+		regions[i].remap = false;
+		if (regions[i].id == 0) continue;       // Skip nil regions.
+		if (regions[i].id & RC_BORDER_REG) continue;    // Skip external regions.
+		regions[i].remap = true;
+	}
+	
+	unsigned short regIdGen = 0;
+	for (int i = 0; i < nreg; ++i)
+	{
+		if (!regions[i].remap)
+			continue;
+		unsigned short oldId = regions[i].id;
+		unsigned short newId = ++regIdGen;
+		for (int j = i; j < nreg; ++j)
+		{
+			if (regions[j].id == oldId)
+			{
+				regions[j].id = newId;
+				regions[j].remap = false;
+			}
+		}
+	}
+	maxRegionId = regIdGen;
+	
+	// Remap regions.
+	for (int i = 0; i < chf.spanCount; ++i)
+	{
+		if ((srcReg[i] & RC_BORDER_REG) == 0)
+			srcReg[i] = regions[srcReg[i]].id;
+	}
+	
+	for (int i = 0; i < nreg; ++i)
+		regions[i].~rcRegion();
+	rcFree(regions);
+	
+	return true;
+}
+
+/// @par
+/// 
+/// This is usually the second to the last step in creating a fully built
+/// compact heightfield.  This step is required before regions are built
+/// using #rcBuildRegions or #rcBuildRegionsMonotone.
+/// 
+/// After this step, the distance data is available via the rcCompactHeightfield::maxDistance
+/// and rcCompactHeightfield::dist fields.
+///
+/// @see rcCompactHeightfield, rcBuildRegions, rcBuildRegionsMonotone
+bool rcBuildDistanceField(rcContext* ctx, rcCompactHeightfield& chf)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_BUILD_DISTANCEFIELD);
+	
+	if (chf.dist)
+	{
+		rcFree(chf.dist);
+		chf.dist = 0;
+	}
+	
+	unsigned short* src = (unsigned short*)rcAlloc(sizeof(unsigned short)*chf.spanCount, RC_ALLOC_TEMP);
+	if (!src)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildDistanceField: Out of memory 'src' (%d).", chf.spanCount);
+		return false;
+	}
+	unsigned short* dst = (unsigned short*)rcAlloc(sizeof(unsigned short)*chf.spanCount, RC_ALLOC_TEMP);
+	if (!dst)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildDistanceField: Out of memory 'dst' (%d).", chf.spanCount);
+		rcFree(src);
+		return false;
+	}
+	
+	unsigned short maxDist = 0;
+
+	ctx->startTimer(RC_TIMER_BUILD_DISTANCEFIELD_DIST);
+	
+	calculateDistanceField(chf, src, maxDist);
+	chf.maxDistance = maxDist;
+	
+	ctx->stopTimer(RC_TIMER_BUILD_DISTANCEFIELD_DIST);
+	
+	ctx->startTimer(RC_TIMER_BUILD_DISTANCEFIELD_BLUR);
+	
+	// Blur
+	if (boxBlur(chf, 1, src, dst) != src)
+		rcSwap(src, dst);
+	
+	// Store distance.
+	chf.dist = src;
+	
+	ctx->stopTimer(RC_TIMER_BUILD_DISTANCEFIELD_BLUR);
+
+	ctx->stopTimer(RC_TIMER_BUILD_DISTANCEFIELD);
+	
+	rcFree(dst);
+	
+	return true;
+}
+
+static void paintRectRegion(int minx, int maxx, int miny, int maxy, unsigned short regId,
+							rcCompactHeightfield& chf, unsigned short* srcReg)
+{
+	const int w = chf.width;	
+	for (int y = miny; y < maxy; ++y)
+	{
+		for (int x = minx; x < maxx; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				if (chf.areas[i] != RC_NULL_AREA)
+					srcReg[i] = regId;
+			}
+		}
+	}
+}
+
+
+static const unsigned short RC_NULL_NEI = 0xffff;
+
+struct rcSweepSpan
+{
+	unsigned short rid;	// row id
+	unsigned short id;	// region id
+	unsigned short ns;	// number samples
+	unsigned short nei;	// neighbour id
+};
+
+/// @par
+/// 
+/// Non-null regions will consist of connected, non-overlapping walkable spans that form a single contour.
+/// Contours will form simple polygons.
+/// 
+/// If multiple regions form an area that is smaller than @p minRegionArea, then all spans will be
+/// re-assigned to the zero (null) region.
+/// 
+/// Partitioning can result in smaller than necessary regions. @p mergeRegionArea helps 
+/// reduce unecessarily small regions.
+/// 
+/// See the #rcConfig documentation for more information on the configuration parameters.
+/// 
+/// The region data will be available via the rcCompactHeightfield::maxRegions
+/// and rcCompactSpan::reg fields.
+/// 
+/// @warning The distance field must be created using #rcBuildDistanceField before attempting to build regions.
+/// 
+/// @see rcCompactHeightfield, rcCompactSpan, rcBuildDistanceField, rcBuildRegionsMonotone, rcConfig
+bool rcBuildRegionsMonotone(rcContext* ctx, rcCompactHeightfield& chf,
+							const int borderSize, const int minRegionArea, const int mergeRegionArea)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_BUILD_REGIONS);
+	
+	const int w = chf.width;
+	const int h = chf.height;
+	unsigned short id = 1;
+	
+	rcScopedDelete<unsigned short> srcReg = (unsigned short*)rcAlloc(sizeof(unsigned short)*chf.spanCount, RC_ALLOC_TEMP);
+	if (!srcReg)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildRegionsMonotone: Out of memory 'src' (%d).", chf.spanCount);
+		return false;
+	}
+	memset(srcReg,0,sizeof(unsigned short)*chf.spanCount);
+
+	const int nsweeps = rcMax(chf.width,chf.height);
+	rcScopedDelete<rcSweepSpan> sweeps = (rcSweepSpan*)rcAlloc(sizeof(rcSweepSpan)*nsweeps, RC_ALLOC_TEMP);
+	if (!sweeps)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildRegionsMonotone: Out of memory 'sweeps' (%d).", nsweeps);
+		return false;
+	}
+	
+	
+	// Mark border regions.
+	if (borderSize > 0)
+	{
+		// Make sure border will not overflow.
+		const int bw = rcMin(w, borderSize);
+		const int bh = rcMin(h, borderSize);
+		// Paint regions
+		paintRectRegion(0, bw, 0, h, id|RC_BORDER_REG, chf, srcReg); id++;
+		paintRectRegion(w-bw, w, 0, h, id|RC_BORDER_REG, chf, srcReg); id++;
+		paintRectRegion(0, w, 0, bh, id|RC_BORDER_REG, chf, srcReg); id++;
+		paintRectRegion(0, w, h-bh, h, id|RC_BORDER_REG, chf, srcReg); id++;
+		
+		chf.borderSize = borderSize;
+	}
+	
+	rcIntArray prev(256);
+
+	// Sweep one line at a time.
+	for (int y = borderSize; y < h-borderSize; ++y)
+	{
+		// Collect spans from this row.
+		prev.resize(id+1);
+		memset(&prev[0],0,sizeof(int)*id);
+		unsigned short rid = 1;
+		
+		for (int x = borderSize; x < w-borderSize; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				const rcCompactSpan& s = chf.spans[i];
+				if (chf.areas[i] == RC_NULL_AREA) continue;
+				
+				// -x
+				unsigned short previd = 0;
+				if (rcGetCon(s, 0) != RC_NOT_CONNECTED)
+				{
+					const int ax = x + rcGetDirOffsetX(0);
+					const int ay = y + rcGetDirOffsetY(0);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 0);
+					if ((srcReg[ai] & RC_BORDER_REG) == 0 && chf.areas[i] == chf.areas[ai])
+						previd = srcReg[ai];
+				}
+				
+				if (!previd)
+				{
+					previd = rid++;
+					sweeps[previd].rid = previd;
+					sweeps[previd].ns = 0;
+					sweeps[previd].nei = 0;
+				}
+
+				// -y
+				if (rcGetCon(s,3) != RC_NOT_CONNECTED)
+				{
+					const int ax = x + rcGetDirOffsetX(3);
+					const int ay = y + rcGetDirOffsetY(3);
+					const int ai = (int)chf.cells[ax+ay*w].index + rcGetCon(s, 3);
+					if (srcReg[ai] && (srcReg[ai] & RC_BORDER_REG) == 0 && chf.areas[i] == chf.areas[ai])
+					{
+						unsigned short nr = srcReg[ai];
+						if (!sweeps[previd].nei || sweeps[previd].nei == nr)
+						{
+							sweeps[previd].nei = nr;
+							sweeps[previd].ns++;
+							prev[nr]++;
+						}
+						else
+						{
+							sweeps[previd].nei = RC_NULL_NEI;
+						}
+					}
+				}
+
+				srcReg[i] = previd;
+			}
+		}
+		
+		// Create unique ID.
+		for (int i = 1; i < rid; ++i)
+		{
+			if (sweeps[i].nei != RC_NULL_NEI && sweeps[i].nei != 0 &&
+				prev[sweeps[i].nei] == (int)sweeps[i].ns)
+			{
+				sweeps[i].id = sweeps[i].nei;
+			}
+			else
+			{
+				sweeps[i].id = id++;
+			}
+		}
+		
+		// Remap IDs
+		for (int x = borderSize; x < w-borderSize; ++x)
+		{
+			const rcCompactCell& c = chf.cells[x+y*w];
+			
+			for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+			{
+				if (srcReg[i] > 0 && srcReg[i] < rid)
+					srcReg[i] = sweeps[srcReg[i]].id;
+			}
+		}
+	}
+
+	ctx->startTimer(RC_TIMER_BUILD_REGIONS_FILTER);
+
+	// Filter out small regions.
+	chf.maxRegions = id;
+	if (!filterSmallRegions(ctx, minRegionArea, mergeRegionArea, chf.maxRegions, chf, srcReg))
+		return false;
+
+	ctx->stopTimer(RC_TIMER_BUILD_REGIONS_FILTER);
+	
+	// Store the result out.
+	for (int i = 0; i < chf.spanCount; ++i)
+		chf.spans[i].reg = srcReg[i];
+	
+	ctx->stopTimer(RC_TIMER_BUILD_REGIONS);
+
+	return true;
+}
+
+/// @par
+/// 
+/// Non-null regions will consist of connected, non-overlapping walkable spans that form a single contour.
+/// Contours will form simple polygons.
+/// 
+/// If multiple regions form an area that is smaller than @p minRegionArea, then all spans will be
+/// re-assigned to the zero (null) region.
+/// 
+/// Watershed partitioning can result in smaller than necessary regions, especially in diagonal corridors. 
+/// @p mergeRegionArea helps reduce unecessarily small regions.
+/// 
+/// See the #rcConfig documentation for more information on the configuration parameters.
+/// 
+/// The region data will be available via the rcCompactHeightfield::maxRegions
+/// and rcCompactSpan::reg fields.
+/// 
+/// @warning The distance field must be created using #rcBuildDistanceField before attempting to build regions.
+/// 
+/// @see rcCompactHeightfield, rcCompactSpan, rcBuildDistanceField, rcBuildRegionsMonotone, rcConfig
+bool rcBuildRegions(rcContext* ctx, rcCompactHeightfield& chf,
+					const int borderSize, const int minRegionArea, const int mergeRegionArea)
+{
+	rcAssert(ctx);
+	
+	ctx->startTimer(RC_TIMER_BUILD_REGIONS);
+	
+	const int w = chf.width;
+	const int h = chf.height;
+	
+	rcScopedDelete<unsigned short> buf = (unsigned short*)rcAlloc(sizeof(unsigned short)*chf.spanCount*4, RC_ALLOC_TEMP);
+	if (!buf)
+	{
+		ctx->log(RC_LOG_ERROR, "rcBuildRegions: Out of memory 'tmp' (%d).", chf.spanCount*4);
+		return false;
+	}
+	
+	ctx->startTimer(RC_TIMER_BUILD_REGIONS_WATERSHED);
+	
+	rcIntArray stack(1024);
+	rcIntArray visited(1024);
+	
+	unsigned short* srcReg = buf;
+	unsigned short* srcDist = buf+chf.spanCount;
+	unsigned short* dstReg = buf+chf.spanCount*2;
+	unsigned short* dstDist = buf+chf.spanCount*3;
+	
+	memset(srcReg, 0, sizeof(unsigned short)*chf.spanCount);
+	memset(srcDist, 0, sizeof(unsigned short)*chf.spanCount);
+	
+	unsigned short regionId = 1;
+	unsigned short level = (chf.maxDistance+1) & ~1;
+
+	// TODO: Figure better formula, expandIters defines how much the 
+	// watershed "overflows" and simplifies the regions. Tying it to
+	// agent radius was usually good indication how greedy it could be.
+//	const int expandIters = 4 + walkableRadius * 2;
+	const int expandIters = 8;
+
+	if (borderSize > 0)
+	{
+		// Make sure border will not overflow.
+		const int bw = rcMin(w, borderSize);
+		const int bh = rcMin(h, borderSize);
+		// Paint regions
+		paintRectRegion(0, bw, 0, h, regionId|RC_BORDER_REG, chf, srcReg); regionId++;
+		paintRectRegion(w-bw, w, 0, h, regionId|RC_BORDER_REG, chf, srcReg); regionId++;
+		paintRectRegion(0, w, 0, bh, regionId|RC_BORDER_REG, chf, srcReg); regionId++;
+		paintRectRegion(0, w, h-bh, h, regionId|RC_BORDER_REG, chf, srcReg); regionId++;
+
+		chf.borderSize = borderSize;
+	}
+	
+	while (level > 0)
+	{
+		level = level >= 2 ? level-2 : 0;
+		
+		ctx->startTimer(RC_TIMER_BUILD_REGIONS_EXPAND);
+		
+		// Expand current regions until no empty connected cells found.
+		if (expandRegions(expandIters, level, chf, srcReg, srcDist, dstReg, dstDist, stack) != srcReg)
+		{
+			rcSwap(srcReg, dstReg);
+			rcSwap(srcDist, dstDist);
+		}
+		
+		ctx->stopTimer(RC_TIMER_BUILD_REGIONS_EXPAND);
+		
+		ctx->startTimer(RC_TIMER_BUILD_REGIONS_FLOOD);
+		
+		// Mark new regions with IDs.
+		for (int y = 0; y < h; ++y)
+		{
+			for (int x = 0; x < w; ++x)
+			{
+				const rcCompactCell& c = chf.cells[x+y*w];
+				for (int i = (int)c.index, ni = (int)(c.index+c.count); i < ni; ++i)
+				{
+					if (chf.dist[i] < level || srcReg[i] != 0 || chf.areas[i] == RC_NULL_AREA)
+						continue;
+					if (floodRegion(x, y, i, level, regionId, chf, srcReg, srcDist, stack))
+						regionId++;
+				}
+			}
+		}
+		
+		ctx->stopTimer(RC_TIMER_BUILD_REGIONS_FLOOD);
+	}
+	
+	// Expand current regions until no empty connected cells found.
+	if (expandRegions(expandIters*8, 0, chf, srcReg, srcDist, dstReg, dstDist, stack) != srcReg)
+	{
+		rcSwap(srcReg, dstReg);
+		rcSwap(srcDist, dstDist);
+	}
+	
+	ctx->stopTimer(RC_TIMER_BUILD_REGIONS_WATERSHED);
+	
+	ctx->startTimer(RC_TIMER_BUILD_REGIONS_FILTER);
+	
+	// Filter out small regions.
+	chf.maxRegions = regionId;
+	if (!filterSmallRegions(ctx, minRegionArea, mergeRegionArea, chf.maxRegions, chf, srcReg))
+		return false;
+	
+	ctx->stopTimer(RC_TIMER_BUILD_REGIONS_FILTER);
+		
+	// Write the result out.
+	for (int i = 0; i < chf.spanCount; ++i)
+		chf.spans[i].reg = srcReg[i];
+	
+	ctx->stopTimer(RC_TIMER_BUILD_REGIONS);
+	
+	return true;
+}
+
+
diff --git a/Engine/lib/recast/TODO.txt b/Engine/lib/recast/TODO.txt
new file mode 100644
index 000000000..b911c0e47
--- /dev/null
+++ b/Engine/lib/recast/TODO.txt
@@ -0,0 +1,20 @@
+TODO/Roadmap
+
+Summer/Autumn 2009
+
+- Off mesh links (jump links)
+- Area annotations
+- Embed extra data per polygon
+- Height conforming navmesh
+
+
+Autumn/Winter 2009/2010
+
+- Detour path following
+- More dynamic example with tile navmesh
+- Faster small tile process
+
+
+More info at http://digestingduck.blogspot.com/2009/07/recast-and-detour-roadmap.html
+
+-
diff --git a/Engine/source/T3D/camera.cpp b/Engine/source/T3D/camera.cpp
index 0ba19ea29..f7e5322d7 100644
--- a/Engine/source/T3D/camera.cpp
+++ b/Engine/source/T3D/camera.cpp
@@ -903,7 +903,7 @@ void Camera::_setPosition(const Point3F& pos, const Point3F& rot)
    
    MatrixF temp;
 
-   if(mDataBlock->cameraCanBank)
+   if(mDataBlock && mDataBlock->cameraCanBank)
    {
       // Take rot.y into account to bank the camera
       MatrixF imat;
@@ -932,7 +932,7 @@ void Camera::setRotation(const Point3F& rot)
 
    MatrixF temp;
 
-   if(mDataBlock->cameraCanBank)
+   if(mDataBlock && mDataBlock->cameraCanBank)
    {
       // Take rot.y into account to bank the camera
       MatrixF imat;
@@ -993,7 +993,7 @@ void Camera::writePacketData(GameConnection *connection, BitStream *bstream)
    bstream->setCompressionPoint(pos);
    mathWrite(*bstream, pos);
    bstream->write(mRot.x);
-   if(bstream->writeFlag(mDataBlock->cameraCanBank))
+   if(mDataBlock && bstream->writeFlag(mDataBlock->cameraCanBank))
    {
       // Include mRot.y to allow for camera banking
       bstream->write(mRot.y);
diff --git a/Engine/source/T3D/cameraSpline.cpp b/Engine/source/T3D/cameraSpline.cpp
index ba4bec72e..94d0055f5 100644
--- a/Engine/source/T3D/cameraSpline.cpp
+++ b/Engine/source/T3D/cameraSpline.cpp
@@ -190,7 +190,8 @@ void CameraSpline::renderTimeMap()
    // Build vertex buffer
    GFXVertexBufferHandle<GFXVertexPC> vb;
    vb.set(GFX, mTimeMap.size(), GFXBufferTypeVolatile);
-   vb.lock();
+   void *ptr = vb.lock();
+   if(!ptr) return;
 
    MRandomLCG random(1376312589 * (U32)this);
    int index = 0;
diff --git a/Engine/source/T3D/convexShape.cpp b/Engine/source/T3D/convexShape.cpp
index 8f9cecaae..5719100ad 100644
--- a/Engine/source/T3D/convexShape.cpp
+++ b/Engine/source/T3D/convexShape.cpp
@@ -653,6 +653,29 @@ bool ConvexShape::buildPolyList( PolyListContext context, AbstractPolyList *plis
 
    const Vector< ConvexShape::Face > faceList = mGeometry.faces;
 
+   if(context == PLC_Navigation)
+   {
+      for(S32 i = 0; i < faceList.size(); i++)
+      {
+         const ConvexShape::Face &face = faceList[i];
+
+         S32 s = face.triangles.size();
+         for(S32 j = 0; j < s; j++)
+         {
+            plist->begin(0, s*i + j);
+
+            plist->plane(PlaneF(face.centroid, face.normal));
+
+            plist->vertex(base + face.points[face.triangles[j].p0]);
+            plist->vertex(base + face.points[face.triangles[j].p1]);
+            plist->vertex(base + face.points[face.triangles[j].p2]);
+
+            plist->end();
+         }
+      }
+      return true;
+   }
+
    for ( S32 i = 0; i < faceList.size(); i++ )
    {
       const ConvexShape::Face &face = faceList[i];		
diff --git a/Engine/source/T3D/decal/decalManager.cpp b/Engine/source/T3D/decal/decalManager.cpp
index 153209933..a39e4bf8d 100644
--- a/Engine/source/T3D/decal/decalManager.cpp
+++ b/Engine/source/T3D/decal/decalManager.cpp
@@ -1004,7 +1004,7 @@ void DecalManager::prepRenderImage( SceneRenderState* state )
 
    PROFILE_START( DecalManager_RenderDecals_SphereTreeCull );
 
-   const Frustum& rootFrustum = state->getFrustum();
+   const Frustum& rootFrustum = state->getCameraFrustum();
 
    // Populate vector of decal instances to be rendered with all
    // decals from visible decal spheres.
@@ -1448,7 +1448,7 @@ void DecalManager::_renderDecalSpheres( ObjectRenderInst* ri, SceneRenderState*
       DecalSphere *decalSphere = grid[i];
       const SphereF &worldSphere = decalSphere->mWorldSphere;
 
-      if( state->getFrustum().isCulled( worldSphere ) )
+      if( state->getCullingFrustum().isCulled( worldSphere ) )
          continue;
 
       drawUtil->drawSphere( desc, worldSphere.radius, worldSphere.center, sphereColor );
diff --git a/Engine/source/T3D/examples/renderMeshExample.cpp b/Engine/source/T3D/examples/renderMeshExample.cpp
index ec6831382..a725fc2eb 100644
--- a/Engine/source/T3D/examples/renderMeshExample.cpp
+++ b/Engine/source/T3D/examples/renderMeshExample.cpp
@@ -284,6 +284,13 @@ void RenderMeshExample::prepRenderImage( SceneRenderState *state )
    // Set our RenderInst as a standard mesh render
    ri->type = RenderPassManager::RIT_Mesh;
 
+   //If our material has transparency set on this will redirect it to proper render bin
+   if ( matInst->getMaterial()->isTranslucent() )
+   {
+      ri->type = RenderPassManager::RIT_Translucent;
+      ri->translucentSort = true;
+   }
+
    // Calculate our sorting point
    if ( state )
    {
diff --git a/Engine/source/T3D/fps/guiShapeNameHud.cpp b/Engine/source/T3D/fps/guiShapeNameHud.cpp
index d63cb2eee..9ec1afac4 100644
--- a/Engine/source/T3D/fps/guiShapeNameHud.cpp
+++ b/Engine/source/T3D/fps/guiShapeNameHud.cpp
@@ -50,11 +50,17 @@ class GuiShapeNameHud : public GuiControl {
    ColorF   mFillColor;
    ColorF   mFrameColor;
    ColorF   mTextColor;
+   ColorF   mLabelFillColor;
+   ColorF   mLabelFrameColor;
 
    F32      mVerticalOffset;
    F32      mDistanceFade;
    bool     mShowFrame;
    bool     mShowFill;
+   bool     mShowLabelFrame;
+   bool     mShowLabelFill;
+
+   Point2I  mLabelPadding;
 
 protected:
    void drawName( Point2I offset, const char *buf, F32 opacity);
@@ -92,6 +98,10 @@ ConsoleDocClass( GuiShapeNameHud,
 		"	textColor = \"1.0 1.0 1.0 1.0\"; // Solid white text Color\n"
 		"	showFill = \"true\";\n"
 		"	showFrame = \"true\";\n"
+		"	labelFillColor = \"0.0 1.0 0.0 1.0\"; // Fills with a solid green color\n"
+		"	labelFrameColor = \"1.0 1.0 1.0 1.0\"; // Solid white frame color\n"
+		"	showLabelFill = \"true\";\n"
+		"	showLabelFrame = \"true\";\n"
 		"	verticalOffset = \"0.15\";\n"
 		"	distanceFade = \"15.0\";\n"
 		"};\n"
@@ -111,6 +121,7 @@ GuiShapeNameHud::GuiShapeNameHud()
    mShowFrame = mShowFill = true;
    mVerticalOffset = 0.5f;
    mDistanceFade = 0.1f;
+   mLabelPadding.set(0, 0);
 }
 
 void GuiShapeNameHud::initPersistFields()
@@ -119,11 +130,16 @@ void GuiShapeNameHud::initPersistFields()
    addField( "fillColor",  TypeColorF, Offset( mFillColor, GuiShapeNameHud ), "Standard color for the background of the control." );
    addField( "frameColor", TypeColorF, Offset( mFrameColor, GuiShapeNameHud ), "Color for the control's frame."  );
    addField( "textColor",  TypeColorF, Offset( mTextColor, GuiShapeNameHud ), "Color for the text on this control." );
+   addField( "labelFillColor",  TypeColorF, Offset( mLabelFillColor, GuiShapeNameHud ), "Color for the background of each shape name label." );
+   addField( "labelFrameColor", TypeColorF, Offset( mLabelFrameColor, GuiShapeNameHud ), "Color for the frames around each shape name label."  );
    endGroup("Colors");     
 
    addGroup("Misc");       
    addField( "showFill",   TypeBool, Offset( mShowFill, GuiShapeNameHud ), "If true, we draw the background color of the control." );
    addField( "showFrame",  TypeBool, Offset( mShowFrame, GuiShapeNameHud ), "If true, we draw the frame of the control."  );
+   addField( "showLabelFill",  TypeBool, Offset( mShowLabelFill, GuiShapeNameHud ), "If true, we draw a background for each shape name label." );
+   addField( "showLabelFrame", TypeBool, Offset( mShowLabelFrame, GuiShapeNameHud ), "If true, we draw a frame around each shape name label."  );
+   addField( "labelPadding", TypePoint2I, Offset( mLabelPadding, GuiShapeNameHud ), "The padding (in pixels) between the label text and the frame." );
    addField( "verticalOffset", TypeF32, Offset( mVerticalOffset, GuiShapeNameHud ), "Amount to vertically offset the control in relation to the ShapeBase object in focus." );
    addField( "distanceFade", TypeF32, Offset( mDistanceFade, GuiShapeNameHud ), "Visibility distance (how far the player must be from the ShapeBase object in focus) for this control to render." );
    endGroup("Misc");
@@ -274,14 +290,26 @@ void GuiShapeNameHud::onRender( Point2I, const RectI &updateRect)
 /// @param   opacity Opacity of name (a fraction).
 void GuiShapeNameHud::drawName(Point2I offset, const char *name, F32 opacity)
 {
+   F32 width = mProfile->mFont->getStrWidth((const UTF8 *)name) + mLabelPadding.x * 2;
+   F32 height = mProfile->mFont->getHeight() + mLabelPadding.y * 2;
+   Point2I extent = Point2I(width, height);
+
    // Center the name
-   offset.x -= mProfile->mFont->getStrWidth((const UTF8 *)name) / 2;
-   offset.y -= mProfile->mFont->getHeight();
+   offset.x -= width / 2;
+   offset.y -= height / 2;
+
+   // Background fill first
+   if (mShowLabelFill)
+      GFX->getDrawUtil()->drawRectFill(RectI(offset, extent), mLabelFillColor);
 
    // Deal with opacity and draw.
    mTextColor.alpha = opacity;
    GFX->getDrawUtil()->setBitmapModulation(mTextColor);
-   GFX->getDrawUtil()->drawText(mProfile->mFont, offset, name);
+   GFX->getDrawUtil()->drawText(mProfile->mFont, offset + mLabelPadding, name);
    GFX->getDrawUtil()->clearBitmapModulation();
+
+   // Border last
+   if (mShowLabelFrame)
+      GFX->getDrawUtil()->drawRect(RectI(offset, extent), mLabelFrameColor);
 }
 
diff --git a/Engine/source/T3D/fx/fxFoliageReplicator.cpp b/Engine/source/T3D/fx/fxFoliageReplicator.cpp
index 04b6c8976..7e88b174f 100644
--- a/Engine/source/T3D/fx/fxFoliageReplicator.cpp
+++ b/Engine/source/T3D/fx/fxFoliageReplicator.cpp
@@ -146,7 +146,7 @@ void fxFoliageRenderList::SetupClipPlanes( SceneRenderState* state, const F32 fa
    const F32 nearPlane = state->getNearPlane();
    const F32 farPlane = farClipPlane;
 
-   const Frustum& frustum = state->getFrustum();
+   const Frustum& frustum = state->getCullingFrustum();
 
    // [rene, 23-Feb-11] Why isn't this preserving the ortho state of the original frustum?
 
diff --git a/Engine/source/T3D/fx/groundCover.cpp b/Engine/source/T3D/fx/groundCover.cpp
index 53c910c3c..bfc2f7737 100644
--- a/Engine/source/T3D/fx/groundCover.cpp
+++ b/Engine/source/T3D/fx/groundCover.cpp
@@ -1225,14 +1225,17 @@ GroundCoverCell* GroundCover::_generateCell( const Point2I& index,
          flipBB *= -1.0f;
 
          PROFILE_START( GroundCover_TerrainRayCast );
-         hit = terrainBlock->getNormalHeightMaterial( Point2F( cp.x - pos.x, cp.y - pos.y ), 
+         // Transform billboard point into terrain's frame of reference.
+         Point3F pp = Point3F(cp.x, cp.y, 0);
+         terrainBlock->getWorldTransform().mulP(pp);
+         hit = terrainBlock->getNormalHeightMaterial( Point2F ( pp.x, pp.y ),
                                                       &normal, &h, matName );
+         PROFILE_END(); // GroundCover_TerrainRayCast
          
          // TODO: When did we loose the world space elevation when
          // getting the terrain height?
          h += pos.z + mZOffset;
 
-         PROFILE_END(); // GroundCover_TerrainRayCast
          if ( !hit || h > typeMaxElevation || h < typeMinElevation || 
               ( typeLayer[0] && !typeInvertLayer && matName != typeLayer ) ||
               ( typeLayer[0] && typeInvertLayer && matName == typeLayer ) )
@@ -1536,7 +1539,7 @@ void GroundCover::prepRenderImage( SceneRenderState *state )
 
    // Setup the frustum culler.
    if ( ( mCuller.getPosition().isZero() || !mDebugLockFrustum ) && !state->isShadowPass() )
-      mCuller = state->getFrustum();
+      mCuller = state->getCullingFrustum();
 
    // Update the cells, but only during the diffuse pass. 
    // We don't want cell generation to thrash when the reflection camera 
@@ -1569,12 +1572,20 @@ void GroundCover::prepRenderImage( SceneRenderState *state )
 
       mShaderConstData.gustInfo.set( mWindGustLength, mWindGustFrequency * simTime, mWindGustStrength );
       mShaderConstData.turbInfo.set( mWindTurbulenceFrequency * simTime, mWindTurbulenceStrength );      
-            
+
+      // Use the camera's forward vector to calculate the camera's right
+      // and up vectors.  This removes any camera banking from affecting
+      // the ground cover.
       const MatrixF &camMat = state->getDiffuseCameraTransform();
       Point3F camDir, camUp, camRight;
       camMat.getColumn( 1, &camDir );
-      camMat.getColumn( 2, &camUp );
-      camMat.getColumn( 0, &camRight );
+      mCross( camDir, Point3F::UnitZ, &camRight );
+      if ( camRight.magnitudeSafe() == 0.0f )
+      {
+         camRight.set( 0.0f, -1.0f, 0.0f );
+      }
+      camRight.normalizeSafe();
+      mCross( camRight, camDir, &camUp );
 
       // Limit the camera up vector to keep the billboards 
       // from leaning too far down into the terrain.
diff --git a/Engine/source/T3D/fx/particleEmitter.cpp b/Engine/source/T3D/fx/particleEmitter.cpp
index 04429d8fa..12a930684 100644
--- a/Engine/source/T3D/fx/particleEmitter.cpp
+++ b/Engine/source/T3D/fx/particleEmitter.cpp
@@ -118,6 +118,7 @@ ParticleEmitterData::ParticleEmitterData()
    ejectionVelocity = 2.0f;   // From 1.0 - 3.0 meters per sec
    velocityVariance = 1.0f;
    ejectionOffset   = sgDefaultEjectionOffset;   // ejection from the emitter point
+   ejectionOffsetVariance = 0.0f;
 
    thetaMin         = 0.0f;   // All heights
    thetaMax         = 90.0f;
@@ -199,6 +200,9 @@ void ParticleEmitterData::initPersistFields()
 
       addFieldV( "ejectionOffset", TYPEID< F32 >(), Offset(ejectionOffset, ParticleEmitterData), &ejectionFValidator,
          "Distance along ejection Z axis from which to eject particles." );
+		 
+      addFieldV( "ejectionOffsetVariance", TYPEID< F32 >(), Offset(ejectionOffsetVariance, ParticleEmitterData), &ejectionFValidator,
+         "Distance Padding along ejection Z axis from which to eject particles." );
 
       addFieldV( "thetaMin", TYPEID< F32 >(), Offset(thetaMin, ParticleEmitterData), &thetaFValidator,
          "Minimum angle, from the horizontal plane, to eject from." );
@@ -316,6 +320,8 @@ void ParticleEmitterData::packData(BitStream* stream)
    stream->writeInt((S32)(velocityVariance * 100), 14);
    if( stream->writeFlag( ejectionOffset != sgDefaultEjectionOffset ) )
       stream->writeInt((S32)(ejectionOffset * 100), 16);
+   if( stream->writeFlag( ejectionOffsetVariance != 0.0f ) )
+      stream->writeInt((S32)(ejectionOffsetVariance * 100), 16);
    stream->writeRangedU32((U32)thetaMin, 0, 180);
    stream->writeRangedU32((U32)thetaMax, 0, 180);
    if( stream->writeFlag( phiReferenceVel != sgDefaultPhiReferenceVel ) )
@@ -368,7 +374,10 @@ void ParticleEmitterData::unpackData(BitStream* stream)
       ejectionOffset = stream->readInt(16) / 100.0f;
    else
       ejectionOffset = sgDefaultEjectionOffset;
-
+   if( stream->readFlag() )
+      ejectionOffsetVariance = stream->readInt(16) / 100.0f;
+   else
+      ejectionOffsetVariance = 0.0f;
    thetaMin = (F32)stream->readRangedU32(0, 180);
    thetaMax = (F32)stream->readRangedU32(0, 180);
    if( stream->readFlag() )
@@ -757,11 +766,6 @@ bool ParticleEmitter::onAdd()
    {
       cleanup->addObject( this );
    }
-   else
-   {
-      AssertFatal( false, "Error, could not find ClientMissionCleanup group" );
-      return false;
-   }
 
    removeFromProcessList();
 
@@ -1287,7 +1291,7 @@ void ParticleEmitter::addParticle(const Point3F& pos,
    F32 initialVel = mDataBlock->ejectionVelocity;
    initialVel    += (mDataBlock->velocityVariance * 2.0f * gRandGen.randF()) - mDataBlock->velocityVariance;
 
-   pNew->pos = pos + (ejectionAxis * mDataBlock->ejectionOffset);
+   pNew->pos = pos + (ejectionAxis * (mDataBlock->ejectionOffset + mDataBlock->ejectionOffsetVariance* gRandGen.randF()) );
    pNew->vel = ejectionAxis * initialVel;
    pNew->orientDir = ejectionAxis;
    pNew->acc.set(0, 0, 0);
diff --git a/Engine/source/T3D/fx/particleEmitter.h b/Engine/source/T3D/fx/particleEmitter.h
index 02aab525a..36d486f47 100644
--- a/Engine/source/T3D/fx/particleEmitter.h
+++ b/Engine/source/T3D/fx/particleEmitter.h
@@ -73,7 +73,7 @@ class ParticleEmitterData : public GameBaseData
    F32   ejectionVelocity;                   ///< Ejection velocity
    F32   velocityVariance;                   ///< Variance for velocity between 0 and n
    F32   ejectionOffset;                     ///< Z offset from emitter point to eject from
-
+   F32   ejectionOffsetVariance;             ///< Z offset Variance from emitter point to eject 
    F32   thetaMin;                           ///< Minimum angle, from the horizontal plane, to eject from
    F32   thetaMax;                           ///< Maximum angle, from the horizontal plane, to eject from
 
diff --git a/Engine/source/T3D/fx/precipitation.cpp b/Engine/source/T3D/fx/precipitation.cpp
index 639d0c747..9205238e0 100644
--- a/Engine/source/T3D/fx/precipitation.cpp
+++ b/Engine/source/T3D/fx/precipitation.cpp
@@ -1727,6 +1727,7 @@ void Precipitation::renderObject(ObjectRenderInst *ri, SceneRenderState *state,
       // Do we need to relock the buffer?
       if ( !vertPtr )
          vertPtr = mRainVB.lock();
+      if(!vertPtr) return;
 
       // Set the proper texture coords... (it's fun!)
       tc = &mTexCoords[4*curr->texCoordIndex];
@@ -1817,6 +1818,7 @@ void Precipitation::renderObject(ObjectRenderInst *ri, SceneRenderState *state,
       // Do we need to relock the buffer?
       if ( !vertPtr )
          vertPtr = mRainVB.lock();
+      if(!vertPtr) return;
 
       vertPtr->point = pos + leftUp;
       vertPtr->texCoord = *tc;
diff --git a/Engine/source/T3D/gameBase/gameBase.h b/Engine/source/T3D/gameBase/gameBase.h
index 68354cdf0..4cb069c62 100644
--- a/Engine/source/T3D/gameBase/gameBase.h
+++ b/Engine/source/T3D/gameBase/gameBase.h
@@ -409,8 +409,8 @@ public:
    virtual bool isValidCameraFov( F32 fov ) { return true; }
    virtual bool useObjsEyePoint() const { return false; }
    virtual bool onlyFirstPerson() const { return false; }
-   virtual F32 getDamageFlash() const { return 1.0f; }
-   virtual F32 getWhiteOut() const { return 1.0f; }
+   virtual F32 getDamageFlash() const { return 0.0f; }
+   virtual F32 getWhiteOut() const { return 0.0f; }
    
    // Not implemented here, but should return the Camera to world transformation matrix
    virtual void getCameraTransform (F32 *pos, MatrixF *mat ) { *mat = MatrixF::Identity; }
diff --git a/Engine/source/T3D/groundPlane.cpp b/Engine/source/T3D/groundPlane.cpp
index a923502cf..b188a7d39 100644
--- a/Engine/source/T3D/groundPlane.cpp
+++ b/Engine/source/T3D/groundPlane.cpp
@@ -288,11 +288,45 @@ void GroundPlane::buildConvex( const Box3F& box, Convex* convex )
    }
 }
 
-bool GroundPlane::buildPolyList( PolyListContext context, AbstractPolyList* polyList, const Box3F&, const SphereF& )
+bool GroundPlane::buildPolyList( PolyListContext context, AbstractPolyList* polyList, const Box3F& box, const SphereF& )
 {
    polyList->setObject( this );
    polyList->setTransform( &MatrixF::Identity, Point3F( 1.0f, 1.0f, 1.0f ) );
 
+   if(context == PLC_Navigation)
+   {
+      F32 z = getPosition().z;
+      Point3F
+         p0(box.minExtents.x, box.maxExtents.y, z),
+         p1(box.maxExtents.x, box.maxExtents.y, z),
+         p2(box.maxExtents.x, box.minExtents.y, z),
+         p3(box.minExtents.x, box.minExtents.y, z);
+
+      // Add vertices to poly list.
+      U32 v0 = polyList->addPoint(p0);
+      polyList->addPoint(p1);
+      polyList->addPoint(p2);
+      polyList->addPoint(p3);
+
+      // Add plane between first three vertices.
+      polyList->begin(0, 0);
+      polyList->vertex(v0);
+      polyList->vertex(v0+1);
+      polyList->vertex(v0+2);
+      polyList->plane(v0, v0+1, v0+2);
+      polyList->end();
+
+      // Add plane between last three vertices.
+      polyList->begin(0, 1);
+      polyList->vertex(v0+2);
+      polyList->vertex(v0+3);
+      polyList->vertex(v0);
+      polyList->plane(v0+2, v0+3, v0);
+      polyList->end();
+
+      return true;
+   }
+
    Box3F planeBox = getPlaneBox();
    polyList->addBox( planeBox, mMaterial );
 
@@ -318,7 +352,7 @@ void GroundPlane::prepRenderImage( SceneRenderState* state )
    PROFILE_SCOPE( GroundPlane_prepRender );
 
    // Update the geometry.
-   createGeometry( state->getFrustum() );
+   createGeometry( state->getCullingFrustum() );
    if( mVertexBuffer.isNull() )
       return;
 
diff --git a/Engine/source/T3D/guiObjectView.cpp b/Engine/source/T3D/guiObjectView.cpp
index 392eee503..00e5de0df 100644
--- a/Engine/source/T3D/guiObjectView.cpp
+++ b/Engine/source/T3D/guiObjectView.cpp
@@ -103,6 +103,7 @@ GuiObjectView::GuiObjectView()
       mMountNodeName( "mount0" ),
       mMountNode( -1 ),
       mCameraSpeed( 0.01f ),
+	  mCameraRotation( 0.0f, 0.0f, 0.0f ),
       mLightColor( 1.0f, 1.0f, 1.0f ),
       mLightAmbient( 0.5f, 0.5f, 0.5f ),
       mLightDirection( 0.f, 0.707f, -0.707f )
diff --git a/Engine/source/T3D/lightFlareData.cpp b/Engine/source/T3D/lightFlareData.cpp
index 5a3e0e322..8cbad24fc 100644
--- a/Engine/source/T3D/lightFlareData.cpp
+++ b/Engine/source/T3D/lightFlareData.cpp
@@ -279,7 +279,7 @@ bool LightFlareData::_testVisibility(const SceneRenderState *state, LightFlareSt
    const Point3F &lightPos = flareState->lightMat.getPosition();  
    const RectI &viewport = GFX->getViewport();
    MatrixF projMatrix;
-   state->getFrustum().getProjectionMatrix(&projMatrix);
+   state->getCameraFrustum().getProjectionMatrix(&projMatrix);
    if( state->isReflectPass() )
       projMatrix = state->getSceneManager()->getNonClipProjection();
    bool onScreen = MathUtils::mProjectWorldToScreen( lightPos, outLightPosSS, viewport, GFX->getWorldMatrix(), projMatrix );
@@ -465,7 +465,7 @@ void LightFlareData::prepRender( SceneRenderState *state, LightFlareState *flare
 
    // Take any projection offset into account so that the point where the flare's
    // elements converge is at the 'eye' point rather than the center of the viewport.
-   const Point2F& projOffset = state->getFrustum().getProjectionOffset();
+   const Point2F& projOffset = state->getCameraFrustum().getProjectionOffset();
    Point3F flareVec( -lightPosSS + Point3F(projOffset.x, projOffset.y, 0.0f) );
    const F32 flareLength = flareVec.len();
    if ( flareLength > 0.0f )
diff --git a/Engine/source/T3D/physics/bullet/btWorld.cpp b/Engine/source/T3D/physics/bullet/btWorld.cpp
index 9ef5d8a50..231b1e3e1 100644
--- a/Engine/source/T3D/physics/bullet/btWorld.cpp
+++ b/Engine/source/T3D/physics/bullet/btWorld.cpp
@@ -286,7 +286,7 @@ void BtWorld::explosion( const Point3F &pos, F32 radius, F32 forceMagnitude )
 
 void BtWorld::onDebugDraw( const SceneRenderState *state )
 {
-   mDebugDraw.setCuller( &state->getFrustum() );
+   mDebugDraw.setCuller( &state->getCullingFrustum() );
 
    mDynamicsWorld->setDebugDrawer( &mDebugDraw );
    mDynamicsWorld->debugDrawWorld();
diff --git a/Engine/source/T3D/physics/physicsShape.cpp b/Engine/source/T3D/physics/physicsShape.cpp
index 1d4e6d84a..660db0791 100644
--- a/Engine/source/T3D/physics/physicsShape.cpp
+++ b/Engine/source/T3D/physics/physicsShape.cpp
@@ -41,6 +41,7 @@
 #include "lighting/lightQuery.h"
 #include "console/engineAPI.h"
 
+using namespace Torque;
 
 bool PhysicsShape::smNoCorrections = false;
 bool PhysicsShape::smNoSmoothing = false;
@@ -240,7 +241,7 @@ void PhysicsShapeData::onRemove()
 
 void PhysicsShapeData::_onResourceChanged( const Torque::Path &path )
 {
-   if ( path != Path( shapeName ) )
+	if ( path != Path( shapeName ) )
       return;
 
    // Reload the changed shape.
diff --git a/Engine/source/T3D/physics/physx/pxMultiActor.cpp b/Engine/source/T3D/physics/physx/pxMultiActor.cpp
index 903405713..8692afcec 100644
--- a/Engine/source/T3D/physics/physx/pxMultiActor.cpp
+++ b/Engine/source/T3D/physics/physx/pxMultiActor.cpp
@@ -570,7 +570,7 @@ bool PxMultiActorData::preload( bool server, String &errorBuffer )
 
    // Register for file change notification to reload the collection
    if ( server )
-      FS::AddChangeNotification( physXStream, this, &PxMultiActorData::_onFileChanged );
+      Torque::FS::AddChangeNotification( physXStream, this, &PxMultiActorData::_onFileChanged );
 
    return true;
 }
@@ -2648,4 +2648,4 @@ ConsoleMethod( PxMultiActorData, reload, void, 2, 2, ""
               "If the reload sucessfully completes, all PxMultiActor's will be notified.\n\n")
 {
    object->reload();
-}
\ No newline at end of file
+}
diff --git a/Engine/source/T3D/player.cpp b/Engine/source/T3D/player.cpp
index af5c38d9d..da29b63e0 100644
--- a/Engine/source/T3D/player.cpp
+++ b/Engine/source/T3D/player.cpp
@@ -128,14 +128,6 @@ enum PlayerConstants {
 //----------------------------------------------------------------------------
 // Player shape animation sequences:
 
-// look     Used to control the upper body arm motion.  Must animate
-//          vertically +-80 deg.
-Player::Range Player::mArmRange(mDegToRad(-80.0f),mDegToRad(+80.0f));
-
-// head     Used to control the direction the head is looking.  Must
-//          animated vertically +-80 deg .
-Player::Range Player::mHeadVRange(mDegToRad(-80.0f),mDegToRad(+80.0f));
-
 // Action Animations:
 PlayerData::ActionAnimationDef PlayerData::ActionAnimationList[NumTableActionAnims] =
 {
@@ -1763,6 +1755,12 @@ void Player::onRemove()
    setControlObject(0);
    scriptOnRemove();
    removeFromScene();
+   
+   if ( isGhost() )
+   {
+      SFX_DELETE( mMoveBubbleSound );
+      SFX_DELETE( mWaterBreathSound );
+   }
 
    U32 i;
    for( i=0; i<PlayerData::NUM_SPLASH_EMITTERS; i++ )
@@ -2864,7 +2862,7 @@ void Player::updateMove(const Move* move)
       if (pvl)
          pv *= moveSpeed / pvl;
 
-      VectorF runAcc = pv - acc;
+      VectorF runAcc = pv - (mVelocity + acc);
       runAcc.z = 0;
       runAcc.x = runAcc.x * mDataBlock->airControl;
       runAcc.y = runAcc.y * mDataBlock->airControl;
@@ -3100,6 +3098,8 @@ void Player::updateMove(const Move* move)
    }
 
    // Container buoyancy & drag
+/* Commented out until the buoyancy calculation can be reworked so that a container and
+** player with the same density will result in neutral buoyancy.
    if (mBuoyancy != 0)
    {     
       // Applying buoyancy when standing still causing some jitters-
@@ -3116,9 +3116,10 @@ void Player::updateMove(const Move* move)
          if ( currHeight + mVelocity.z * TickSec * C > mLiquidHeight )
             buoyancyForce *= M;
                   
-         //mVelocity.z -= buoyancyForce;
+         mVelocity.z -= buoyancyForce;
       }
    }
+*/
 
    // Apply drag
    if ( mSwimming )
@@ -3425,31 +3426,38 @@ void Player::updateDamageState()
 
 //----------------------------------------------------------------------------
 
-void Player::updateLookAnimation(F32 dT)
+void Player::updateLookAnimation(F32 dt)
 {
    // Calculate our interpolated head position.
-   Point3F renderHead = delta.head + delta.headVec * dT;
+   Point3F renderHead = delta.head + delta.headVec * dt;
 
    // Adjust look pos.  This assumes that the animations match
    // the min and max look angles provided in the datablock.
    if (mArmAnimation.thread) 
    {
-      // TG: Adjust arm position to avoid collision.
-      F32 tp = mControlObject? 0.5:
-         (renderHead.x - mArmRange.min) / mArmRange.delta;
-      mShapeInstance->setPos(mArmAnimation.thread,mClampF(tp,0,1));
+      if(mControlObject)
+      {
+         mShapeInstance->setPos(mArmAnimation.thread,0.5f);
+      }
+      else
+      {
+         F32 d = mDataBlock->maxLookAngle - mDataBlock->minLookAngle;
+         F32 tp = (renderHead.x - mDataBlock->minLookAngle) / d;
+         mShapeInstance->setPos(mArmAnimation.thread,mClampF(tp,0,1));
+      }
    }
    
    if (mHeadVThread) 
    {
-      F32 tp = (renderHead.x - mHeadVRange.min) / mHeadVRange.delta;
+      F32 d = mDataBlock->maxLookAngle - mDataBlock->minLookAngle;
+      F32 tp = (renderHead.x - mDataBlock->minLookAngle) / d;
       mShapeInstance->setPos(mHeadVThread,mClampF(tp,0,1));
    }
    
    if (mHeadHThread) 
    {
-      F32 dt = 2 * mDataBlock->maxFreelookAngle;
-      F32 tp = (renderHead.z + mDataBlock->maxFreelookAngle) / dt;
+      F32 d = 2 * mDataBlock->maxFreelookAngle;
+      F32 tp = (renderHead.z + mDataBlock->maxFreelookAngle) / d;
       mShapeInstance->setPos(mHeadHThread,mClampF(tp,0,1));
    }
 }
diff --git a/Engine/source/T3D/player.h b/Engine/source/T3D/player.h
index bdb5a2c0e..17a05445a 100644
--- a/Engine/source/T3D/player.h
+++ b/Engine/source/T3D/player.h
@@ -398,16 +398,6 @@ protected:
       NextFreeMask = Parent::NextFreeMask << 3
    };
 
-   struct Range {
-      Range(F32 _min,F32 _max) {
-         min = _min;
-         max = _max;
-         delta = _max - _min;
-      };
-      F32 min,max;
-      F32 delta;
-   };
-
    SimObjectPtr<ParticleEmitter> mSplashEmitter[PlayerData::NUM_SPLASH_EMITTERS];
    F32 mBubbleEmitterTime;
 
@@ -508,9 +498,6 @@ protected:
    TSThread* mHeadHThread;
    TSThread* mRecoilThread;
    TSThread* mImageStateThread;
-   static Range mArmRange;
-   static Range mHeadVRange;
-   static Range mHeadHRange;
    /// @}
 
    bool mInMissionArea;       ///< Are we in the mission area?
diff --git a/Engine/source/T3D/projectile.cpp b/Engine/source/T3D/projectile.cpp
index 47474eb24..ddff494c7 100644
--- a/Engine/source/T3D/projectile.cpp
+++ b/Engine/source/T3D/projectile.cpp
@@ -1106,68 +1106,61 @@ void Projectile::simulate( F32 dt )
       if ( isServerObject() && ( rInfo.object->getTypeMask() & csmStaticCollisionMask ) == 0 )
          setMaskBits( BounceMask );
 
+      MatrixF xform( true );
+      xform.setColumn( 3, rInfo.point );
+      setTransform( xform );
+      mCurrPosition    = rInfo.point;
+      mCurrVelocity    = Point3F::Zero;
+
+      // Get the object type before the onCollision call, in case
+      // the object is destroyed.
+      U32 objectType = rInfo.object->getTypeMask();
+
+      // re-enable the collision response on the source object since
+      // we need to process the onCollision and explode calls
+      if ( disableSourceObjCollision )
+         mSourceObject->enableCollision();
+
+      // Ok, here is how this works:
+      // onCollision is called to notify the server scripts that a collision has occurred, then
+      // a call to explode is made to start the explosion process. The call to explode is made
+      // twice, once on the server and once on the client.
+      // The server process is responsible for two things:
+      //    1) setting the ExplosionMask network bit to guarantee that the client calls explode
+      //    2) initiate the explosion process on the server scripts
+      // The client process is responsible for only one thing:
+      //    1) drawing the appropriate explosion
+
+      // It is possible that during the processTick the server may have decided that a hit
+      // has occurred while the client prediction has decided that a hit has not occurred.
+      // In this particular scenario the client will have failed to call onCollision and
+      // explode during the processTick. However, the explode function will be called
+      // during the next packet update, due to the ExplosionMask network bit being set.
+      // onCollision will remain uncalled on the client however, therefore no client
+      // specific code should be placed inside the function!
+      onCollision( rInfo.point, rInfo.normal, rInfo.object );
       // Next order of business: do we explode on this hit?
       if ( mCurrTick > mDataBlock->armingDelay || mDataBlock->armingDelay == 0 )
-      {
-         MatrixF xform( true );
-         xform.setColumn( 3, rInfo.point );
-         setTransform( xform );
-         mCurrPosition    = rInfo.point;
-         mCurrVelocity    = Point3F::Zero;
-
-         // Get the object type before the onCollision call, in case
-         // the object is destroyed.
-         U32 objectType = rInfo.object->getTypeMask();
-
-         // re-enable the collision response on the source object since
-         // we need to process the onCollision and explode calls
-         if ( disableSourceObjCollision )
-            mSourceObject->enableCollision();
-
-         // Ok, here is how this works:
-         // onCollision is called to notify the server scripts that a collision has occurred, then
-         // a call to explode is made to start the explosion process. The call to explode is made
-         // twice, once on the server and once on the client.
-         // The server process is responsible for two things:
-         //    1) setting the ExplosionMask network bit to guarantee that the client calls explode
-         //    2) initiate the explosion process on the server scripts
-         // The client process is responsible for only one thing:
-         //    1) drawing the appropriate explosion
-
-         // It is possible that during the processTick the server may have decided that a hit
-         // has occurred while the client prediction has decided that a hit has not occurred.
-         // In this particular scenario the client will have failed to call onCollision and
-         // explode during the processTick. However, the explode function will be called
-         // during the next packet update, due to the ExplosionMask network bit being set.
-         // onCollision will remain uncalled on the client however, therefore no client
-         // specific code should be placed inside the function!
-         onCollision( rInfo.point, rInfo.normal, rInfo.object );
          explode( rInfo.point, rInfo.normal, objectType );
 
-         // break out of the collision check, since we've exploded
-         // we don't want to mess with the position and velocity
-      }
-      else
+      if ( mDataBlock->isBallistic )
       {
-         if ( mDataBlock->isBallistic )
-         {
-            // Otherwise, this represents a bounce.  First, reflect our velocity
-            //  around the normal...
-            Point3F bounceVel = mCurrVelocity - rInfo.normal * (mDot( mCurrVelocity, rInfo.normal ) * 2.0);
-            mCurrVelocity = bounceVel;
+         // Otherwise, this represents a bounce.  First, reflect our velocity
+         //  around the normal...
+         Point3F bounceVel = mCurrVelocity - rInfo.normal * (mDot( mCurrVelocity, rInfo.normal ) * 2.0);
+         mCurrVelocity = bounceVel;
 
-            // Add in surface friction...
-            Point3F tangent = bounceVel - rInfo.normal * mDot(bounceVel, rInfo.normal);
-            mCurrVelocity  -= tangent * mDataBlock->bounceFriction;
+         // Add in surface friction...
+         Point3F tangent = bounceVel - rInfo.normal * mDot(bounceVel, rInfo.normal);
+         mCurrVelocity  -= tangent * mDataBlock->bounceFriction;
 
-            // Now, take elasticity into account for modulating the speed of the grenade
-            mCurrVelocity *= mDataBlock->bounceElasticity;
+         // Now, take elasticity into account for modulating the speed of the grenade
+         mCurrVelocity *= mDataBlock->bounceElasticity;
 
-            // Set the new position to the impact and the bounce
-            // will apply on the next frame.
-            //F32 timeLeft = 1.0f - rInfo.t;
-            newPosition = oldPosition = rInfo.point + rInfo.normal * 0.05f;
-         }
+         // Set the new position to the impact and the bounce
+         // will apply on the next frame.
+         //F32 timeLeft = 1.0f - rInfo.t;
+         newPosition = oldPosition = rInfo.point + rInfo.normal * 0.05f;
       }
    }
 
@@ -1444,4 +1437,4 @@ DefineEngineMethod(Projectile, presimulate, void, (F32 seconds), (1.0f),
                                        "@note This function is not called if the SimObject::hidden is true.")
 {
 	object->simulate( seconds );
-}
+}
\ No newline at end of file
diff --git a/Engine/source/T3D/rigidShape.cpp b/Engine/source/T3D/rigidShape.cpp
index abd04937b..5d21d97c8 100644
--- a/Engine/source/T3D/rigidShape.cpp
+++ b/Engine/source/T3D/rigidShape.cpp
@@ -774,7 +774,8 @@ void RigidShape::processTick(const Move* move)
 
       // Update the physics based on the integration rate
       S32 count = mDataBlock->integration;
-      updateWorkingCollisionSet(getCollisionMask());
+      if (!mDisableMove)
+         updateWorkingCollisionSet(getCollisionMask());
       for (U32 i = 0; i < count; i++)
          updatePos(TickSec / count);
 
diff --git a/Engine/source/T3D/shapeBase.cpp b/Engine/source/T3D/shapeBase.cpp
index a81df8ece..0ecde5097 100644
--- a/Engine/source/T3D/shapeBase.cpp
+++ b/Engine/source/T3D/shapeBase.cpp
@@ -4613,6 +4613,11 @@ DefineEngineMethod( ShapeBase, isEnabled, bool, (),,
    return object->getDamageState() == ShapeBase::Enabled;
 }
 
+DefineEngineMethod(ShapeBase, blowUp, void, (),, "@brief Explodes an object into pieces.")
+{
+	object->blowUp();
+}
+
 DefineEngineMethod( ShapeBase, applyDamage, void, ( F32 amount ),,
    "@brief Increment the current damage level by the specified amount.\n\n"
 
diff --git a/Engine/source/T3D/shapeBase.h b/Engine/source/T3D/shapeBase.h
index 3ad3cc291..2987163df 100644
--- a/Engine/source/T3D/shapeBase.h
+++ b/Engine/source/T3D/shapeBase.h
@@ -1121,7 +1121,6 @@ protected:
    virtual void ejectShellCasing( U32 imageSlot );
    virtual void updateDamageLevel();
    virtual void updateDamageState();
-   virtual void blowUp();
    virtual void onImpact(SceneObject* obj, VectorF vec);
    virtual void onImpact(VectorF vec);
    /// @}
@@ -1307,6 +1306,9 @@ public:
    /// Returns the recharge rate
    F32  getRechargeRate() { return mRechargeRate; }
 
+   /// Makes the shape explode.
+   virtual void blowUp();
+
    /// @}
 
    /// @name Script sounds
diff --git a/Engine/source/T3D/shapeImage.cpp b/Engine/source/T3D/shapeImage.cpp
index ea0ba3eba..554eeb55b 100644
--- a/Engine/source/T3D/shapeImage.cpp
+++ b/Engine/source/T3D/shapeImage.cpp
@@ -686,10 +686,6 @@ void ShapeBaseImageData::initPersistFields()
       "@see eyeOffset\n\n"
       "@see animateOnServer\n\n");
 
-   addField( "correctMuzzleVector", TypeBool,  Offset(correctMuzzleVector, ShapeBaseImageData),
-      "@brief Flag to adjust the aiming vector to the eye's LOS point.\n\n"
-      "@see ShapeBase::getMuzzleVector()" );
-
    addField( "correctMuzzleVector", TypeBool,  Offset(correctMuzzleVector, ShapeBaseImageData),
       "@brief Flag to adjust the aiming vector to the eye's LOS point when in 1st person view.\n\n"
       "@see ShapeBase::getMuzzleVector()" );
@@ -3092,7 +3088,10 @@ TICKAGAIN:
    }
 
    if ( image.rDT > 0.0f && image.delayTime > 0.0f && imageData.useRemainderDT && dt != 0.0f )
+   {
+      dt = image.rDT;
       goto TICKAGAIN;
+   }
 }
 
 
@@ -3259,7 +3258,7 @@ void ShapeBase::submitLights( LightManager *lm, bool staticLighting )
             {
             S32 elapsed = Sim::getCurrentTime() - image.lightStart;
             if ( elapsed > imageData->lightDuration )
-               return;
+               continue;
             intensity = ( 1.0 - (F32)elapsed / (F32)imageData->lightDuration ) * imageData->lightBrightness;
             break;
             }
diff --git a/Engine/source/T3D/trigger.cpp b/Engine/source/T3D/trigger.cpp
index b47bcabbc..093738c45 100644
--- a/Engine/source/T3D/trigger.cpp
+++ b/Engine/source/T3D/trigger.cpp
@@ -263,8 +263,9 @@ ConsoleGetType( TypeTriggerPolyhedron )
    dSprintf(retBuf, 1023, "%7.7f %7.7f %7.7f %7.7f %7.7f %7.7f %7.7f %7.7f %7.7f %7.7f %7.7f %7.7f",
             origin.x, origin.y, origin.z,
             vecs[0].x, vecs[0].y, vecs[0].z,
-            vecs[2].x, vecs[2].y, vecs[2].z,
-            vecs[1].x, vecs[1].y, vecs[1].z);
+            vecs[1].x, vecs[1].y, vecs[1].z,
+			vecs[2].x, vecs[2].y, vecs[2].z);
+            
 
    return retBuf;
 }
diff --git a/Engine/source/T3D/tsStatic.cpp b/Engine/source/T3D/tsStatic.cpp
index 1910ad707..f2e666c4f 100644
--- a/Engine/source/T3D/tsStatic.cpp
+++ b/Engine/source/T3D/tsStatic.cpp
@@ -49,6 +49,8 @@
 #include "materials/materialFeatureTypes.h"
 #include "console/engineAPI.h"
 
+using namespace Torque;
+
 extern bool gEditingMission;
 
 IMPLEMENT_CO_NETOBJECT_V1(TSStatic);
@@ -523,7 +525,7 @@ void TSStatic::prepRenderImage( SceneRenderState* state )
    Frustum culler;
    if ( mMeshCulling )
    {
-      culler = state->getFrustum();
+      culler = state->getCullingFrustum();
       MatrixF xfm( true );
       xfm.scale( Point3F::One / getScale() );
       xfm.mul( getRenderWorldTransform() );
diff --git a/Engine/source/app/net/net.cpp b/Engine/source/app/net/net.cpp
index 7c5f47a0d..922a6b808 100644
--- a/Engine/source/app/net/net.cpp
+++ b/Engine/source/app/net/net.cpp
@@ -31,27 +31,14 @@
 #include "sim/netObject.h"
 #include "app/net/serverQuery.h"
 #include "console/engineAPI.h"
-
+#include <vector>
+#include "net.h"
 //----------------------------------------------------------------
 // remote procedure call console functions
 //----------------------------------------------------------------
 
-class RemoteCommandEvent : public NetEvent
-{
-public:
-   typedef NetEvent Parent;
-   enum {
-      MaxRemoteCommandArgs = 20,
-      CommandArgsBits = 5
-   };
 
-private:
-   S32 mArgc;
-   char *mArgv[MaxRemoteCommandArgs + 1];
-   NetStringHandle mTagv[MaxRemoteCommandArgs + 1];
-   static char mBuf[1024];
-public:
-   RemoteCommandEvent(S32 argc=0, const char **argv=NULL, NetConnection *conn = NULL)
+   RemoteCommandEvent::RemoteCommandEvent(S32 argc, const char **argv, NetConnection *conn)
    {
       mArgc = argc;
       for(S32 i = 0; i < argc; i++)
@@ -73,7 +60,7 @@ public:
    }
 
 #ifdef TORQUE_DEBUG_NET
-   const char *getDebugName()
+   const char *RemoteCommandEvent::getDebugName()
    {
       static char buffer[256];
       dSprintf(buffer, sizeof(buffer), "%s [%s]", getClassName(), mTagv[1].isValidString() ? mTagv[1].getString() : "--unknown--" );
@@ -81,13 +68,13 @@ public:
    }
 #endif
 
-   ~RemoteCommandEvent()
+   RemoteCommandEvent::~RemoteCommandEvent()
    {
       for(S32 i = 0; i < mArgc; i++)
          dFree(mArgv[i+1]);
    }
 
-   virtual void pack(NetConnection* conn, BitStream *bstream)
+   void RemoteCommandEvent::pack(NetConnection* conn, BitStream *bstream)
    {
       bstream->writeInt(mArgc, CommandArgsBits);
       // write it out reversed... why?
@@ -98,12 +85,12 @@ public:
          conn->packString(bstream, mArgv[i+1]);
    }
 
-   virtual void write(NetConnection* conn, BitStream *bstream)
+   void RemoteCommandEvent::write(NetConnection* conn, BitStream *bstream)
    {
       pack(conn, bstream);
    }
 
-   virtual void unpack(NetConnection* conn, BitStream *bstream)
+   void RemoteCommandEvent::unpack(NetConnection* conn, BitStream *bstream)
    {
 
       mArgc = bstream->readInt(CommandArgsBits);
@@ -115,7 +102,7 @@ public:
       }
    }
 
-   virtual void process(NetConnection *conn)
+   void RemoteCommandEvent::process(NetConnection *conn)
    {
       static char idBuf[10];
 
@@ -165,8 +152,52 @@ public:
       }
    }
 
-   DECLARE_CONOBJECT(RemoteCommandEvent);
-};
+   void RemoteCommandEvent::sendRemoteCommand(NetConnection *conn, S32 argc, const char **argv)
+	   {
+	   if(U8(argv[0][0]) != StringTagPrefixByte)
+		   {
+		   Con::errorf(ConsoleLogEntry::Script, "Remote Command Error - command must be a tag.");
+		   return;
+		   }
+	   S32 i;
+	   for(i = argc - 1; i >= 0; i--)
+		   {
+		   if(argv[i][0] != 0)
+			   break;
+		   argc = i;
+		   }
+	   for(i = 0; i < argc; i++)
+		   conn->validateSendString(argv[i]);
+	   RemoteCommandEvent *cevt = new RemoteCommandEvent(argc, argv, conn);
+	   conn->postNetEvent(cevt);
+	   }
+
+   const char* RemoteCommandEvent::getTaggedString(const char* tag)
+	{
+	const char *indexPtr = tag;
+	if (*indexPtr == StringTagPrefixByte)
+		indexPtr++;
+	return gNetStringTable->lookupString(dAtoi(indexPtr));
+	}
+
+   void RemoteCommandEvent::removeTaggedString(S32 tag)
+	{
+	if (tag)
+		gNetStringTable->removeString(tag, true);
+	}
+
+   const char* RemoteCommandEvent::addTaggedString(const char* str)
+	   {
+	   NetStringHandle s(str);
+	   gNetStringTable->incStringRefScript(s.getIndex());
+
+	   char *ret = Con::getReturnBuffer(10);
+	   ret[0] = StringTagPrefixByte;
+	   dSprintf(ret + 1, 9, "%d", s.getIndex());
+	   return ret;
+	   }
+
+
 char RemoteCommandEvent::mBuf[1024];
 
 IMPLEMENT_CO_NETEVENT_V1(RemoteCommandEvent);
@@ -176,30 +207,13 @@ ConsoleDocClass( RemoteCommandEvent,
 				"Not intended for game development, for exposing ConsoleFunctions (such as commandToClient) only.\n\n"
 				"@internal");
 
-static void sendRemoteCommand(NetConnection *conn, S32 argc, const char **argv)
-{
-   if(U8(argv[0][0]) != StringTagPrefixByte)
-   {
-      Con::errorf(ConsoleLogEntry::Script, "Remote Command Error - command must be a tag.");
-      return;
-   }
-   S32 i;
-   for(i = argc - 1; i >= 0; i--)
-   {
-      if(argv[i][0] != 0)
-         break;
-      argc = i;
-   }
-   for(i = 0; i < argc; i++)
-      conn->validateSendString(argv[i]);
-   RemoteCommandEvent *cevt = new RemoteCommandEvent(argc, argv, conn);
-   conn->postNetEvent(cevt);
-}
+
 
 ConsoleFunctionGroupBegin( Net, "Functions for use with the network; tagged strings and remote commands.");
 
+
 ConsoleFunction( commandToServer, void, 2, RemoteCommandEvent::MaxRemoteCommandArgs + 1, "(string func, ...)"
-   "@brief Send a command to the server.\n\n"
+	"@brief Send a command to the server.\n\n"
 
    "@param func Name of the server command being called\n"
    "@param ... Various parameters being passed to server command\n\n"
@@ -237,7 +251,7 @@ ConsoleFunction( commandToServer, void, 2, RemoteCommandEvent::MaxRemoteCommandA
    NetConnection *conn = NetConnection::getConnectionToServer();
    if(!conn)
       return;
-   sendRemoteCommand(conn, argc - 1, argv + 1);
+   RemoteCommandEvent::sendRemoteCommand(conn, argc - 1, argv + 1);
 }
 
 ConsoleFunction( commandToClient, void, 3, RemoteCommandEvent::MaxRemoteCommandArgs + 2, "(NetConnection client, string func, ...)"
@@ -274,11 +288,14 @@ ConsoleFunction( commandToClient, void, 3, RemoteCommandEvent::MaxRemoteCommandA
    NetConnection *conn;
    if(!Sim::findObject(argv[1], conn))
       return;
-   sendRemoteCommand(conn, argc - 2, argv + 2);
+   RemoteCommandEvent::sendRemoteCommand(conn, argc - 2, argv + 2);
 }
 
 
-ConsoleFunction(removeTaggedString, void, 2, 2, "(int tag)"
+
+
+
+DefineEngineFunction(removeTaggedString, void, (S32 tag), (-1),
    "@brief Remove a tagged string from the Net String Table\n\n"
 
    "@param tag The tag associated with the string\n\n"
@@ -287,11 +304,11 @@ ConsoleFunction(removeTaggedString, void, 2, 2, "(int tag)"
    "@see addTaggedString()\n"
    "@see getTaggedString()\n"
    "@ingroup Networking\n")
-{
-   gNetStringTable->removeString(dAtoi(argv[1]+1), true);
-}
+	{
+	RemoteCommandEvent::removeTaggedString(tag);
+	}
 
-ConsoleFunction( addTaggedString, const char*, 2, 2, "(string str)"
+DefineEngineFunction(addTaggedString, const char* , (const char* str), (""),
    "@brief Use the addTaggedString function to tag a new string and add it to the NetStringTable\n\n"
 
    "@param str The string to be tagged and placed in the NetStringTable. Tagging ignores case, "
@@ -303,17 +320,13 @@ ConsoleFunction( addTaggedString, const char*, 2, 2, "(string str)"
    "@see removeTaggedString()\n"
    "@see getTaggedString()\n"
    "@ingroup Networking\n")
-{
-   NetStringHandle s(argv[1]);
-   gNetStringTable->incStringRefScript(s.getIndex());
+	{
+	return RemoteCommandEvent::addTaggedString(str);
+	}
 
-   char *ret = Con::getReturnBuffer(10);
-   ret[0] = StringTagPrefixByte;
-   dSprintf(ret + 1, 9, "%d", s.getIndex());
-   return ret;
-}
 
-ConsoleFunction( getTaggedString, const char*, 2, 2, "(int tag)"
+
+DefineEngineFunction(getTaggedString, const char* , (const char *tag), (""),
    "@brief Use the getTaggedString function to convert a tag to a string.\n\n"
 
    "This is not the same as detag() which can only be used within the context "
@@ -328,12 +341,11 @@ ConsoleFunction( getTaggedString, const char*, 2, 2, "(int tag)"
    "@see addTaggedString()\n"
    "@see removeTaggedString()\n"
    "@ingroup Networking\n")
-{
-   const char *indexPtr = argv[1];
-   if (*indexPtr == StringTagPrefixByte)
-      indexPtr++;
-   return gNetStringTable->lookupString(dAtoi(indexPtr));
-}
+	{
+	return RemoteCommandEvent::getTaggedString(tag);
+	}
+
+
 
 ConsoleFunction( buildTaggedString, const char*, 2, 11, "(string format, ...)"
    "@brief Build a string using the specified tagged string format.\n\n"
diff --git a/Engine/source/app/net/net.h b/Engine/source/app/net/net.h
new file mode 100644
index 000000000..c0aee9335
--- /dev/null
+++ b/Engine/source/app/net/net.h
@@ -0,0 +1,59 @@
+#ifndef _NET_H_
+#define _NET_H_
+
+#include "platform/platform.h"
+#include "core/dnet.h"
+#include "core/idGenerator.h"
+#include "core/stream/bitStream.h"
+#include "console/simBase.h"
+#include "console/console.h"
+#include "console/consoleTypes.h"
+#include "sim/netConnection.h"
+#include "sim/netObject.h"
+#include "app/net/serverQuery.h"
+#include "console/engineAPI.h"
+
+class RemoteCommandEvent : public NetEvent
+{
+public:
+   typedef NetEvent Parent;
+   enum {
+      MaxRemoteCommandArgs = 20,
+      CommandArgsBits = 5
+   };
+
+private:
+   S32 mArgc;
+   char *mArgv[MaxRemoteCommandArgs + 1];
+   NetStringHandle mTagv[MaxRemoteCommandArgs + 1];
+   static char mBuf[1024];
+
+public:
+   RemoteCommandEvent(S32 argc=0, const char **argv=NULL, NetConnection *conn = NULL);
+
+#ifdef TORQUE_DEBUG_NET
+   const char *getDebugName();
+#endif
+
+   ~RemoteCommandEvent();
+
+   virtual void pack(NetConnection* conn, BitStream *bstream);
+
+   virtual void write(NetConnection* conn, BitStream *bstream);
+
+   virtual void unpack(NetConnection* conn, BitStream *bstream);
+
+   virtual void process(NetConnection *conn);
+
+   static void sendRemoteCommand(NetConnection *conn, S32 argc, const char **argv);
+	 
+   static void removeTaggedString(S32);
+
+   static const char* addTaggedString(const char* str);
+
+   static const char* getTaggedString(const char* tag);
+
+   DECLARE_CONOBJECT(RemoteCommandEvent);
+};
+
+#endif
\ No newline at end of file
diff --git a/Engine/source/app/version.h b/Engine/source/app/version.h
index 02f9a1238..9a6bdff8d 100644
--- a/Engine/source/app/version.h
+++ b/Engine/source/app/version.h
@@ -41,10 +41,10 @@
 /// code version, the game name, and which type of game it is (TGB, TGE, TGEA, etc.).
 ///
 /// Version number is major * 1000 + minor * 100 + revision * 10.
-#define TORQUE_GAME_ENGINE          3000
+#define TORQUE_GAME_ENGINE          3500
 
 /// Human readable engine version string.
-#define TORQUE_GAME_ENGINE_VERSION_STRING  "3.0"
+#define TORQUE_GAME_ENGINE_VERSION_STRING  "3.5"
 
 /// Gets the engine version number.  The version number is specified as a global in version.cc
 U32 getVersionNumber();
diff --git a/Engine/source/console/arrayObject.cpp b/Engine/source/console/arrayObject.cpp
index 4d9a565f6..28f38868c 100644
--- a/Engine/source/console/arrayObject.cpp
+++ b/Engine/source/console/arrayObject.cpp
@@ -76,7 +76,7 @@ S32 QSORT_CALLBACK ArrayObject::_valueNumCompare( const void* a, const void* b )
    F32 bCol = dAtof(eb->value);
    F32 result = aCol - bCol;
    S32 res = result < 0 ? -1 : (result > 0 ? 1 : 0);
-   return ( smDecreasing ? res : -res );
+   return ( smDecreasing ? -res : res );
 }
 
 S32 QSORT_CALLBACK ArrayObject::_keyCompare( const void* a, const void* b )
@@ -95,7 +95,7 @@ S32 QSORT_CALLBACK ArrayObject::_keyNumCompare( const void* a, const void* b )
    const char* bCol = eb->key;
    F32 result = dAtof(aCol) - dAtof(bCol);
    S32 res = result < 0 ? -1 : (result > 0 ? 1 : 0);
-   return ( smDecreasing ? res : -res );
+   return ( smDecreasing ? -res : res );
 }
 
 S32 QSORT_CALLBACK ArrayObject::_keyFunctionCompare( const void* a, const void* b )
@@ -110,7 +110,7 @@ S32 QSORT_CALLBACK ArrayObject::_keyFunctionCompare( const void* a, const void*
    
    S32 result = dAtoi( Con::execute( 3, argv ) );
    S32 res = result < 0 ? -1 : ( result > 0 ? 1 : 0 );
-   return ( smDecreasing ? res : -res );
+   return ( smDecreasing ? -res : res );
 }
 
 S32 QSORT_CALLBACK ArrayObject::_valueFunctionCompare( const void* a, const void* b )
@@ -125,7 +125,7 @@ S32 QSORT_CALLBACK ArrayObject::_valueFunctionCompare( const void* a, const void
    
    S32 result = dAtoi( Con::execute( 3, argv ) );
    S32 res = result < 0 ? -1 : ( result > 0 ? 1 : 0 );
-   return ( smDecreasing ? res : -res );
+   return ( smDecreasing ? -res : res );
 }
 
 
diff --git a/Engine/source/console/compiledEval.cpp b/Engine/source/console/compiledEval.cpp
index e2cfcf280..37f621997 100644
--- a/Engine/source/console/compiledEval.cpp
+++ b/Engine/source/console/compiledEval.cpp
@@ -650,6 +650,15 @@ breakContinue:
                // IF we aren't looking at a local/internal object, then check if 
                // this object already exists in the global space
 
+               AbstractClassRep* rep = AbstractClassRep::findClassRep( objectName );
+               if (rep != NULL) {
+                  Con::errorf(ConsoleLogEntry::General, "%s: Cannot name object [%s] the same name as a script class.",
+                     getFileLine(ip), objectName);
+                  ip = failJump;
+                  STR.popFrame();
+                  break;
+               }
+
                SimObject *obj = Sim::findObject( objectName );
                if (obj /*&& !obj->isLocalName()*/)
                {
diff --git a/Engine/source/core/bitVector.h b/Engine/source/core/bitVector.h
index 3cd60108a..99e02222e 100644
--- a/Engine/source/core/bitVector.h
+++ b/Engine/source/core/bitVector.h
@@ -158,6 +158,9 @@ inline BitVector::BitVector( U32 sizeInBits )
 
 inline BitVector::BitVector( const BitVector &r )
 {
+   mBits     = NULL;
+   mByteSize = 0;
+   mSize = 0;
    copy(r);
 }
 
diff --git a/Engine/source/core/iTickable.cpp b/Engine/source/core/iTickable.cpp
index 532a67a42..cb98e7cb9 100644
--- a/Engine/source/core/iTickable.cpp
+++ b/Engine/source/core/iTickable.cpp
@@ -86,15 +86,34 @@ bool ITickable::advanceTime( U32 timeDelta )
 
    // Advance objects
    if( tickCount )
+   {
       for( ; smLastTick != targetTick; smLastTick += smTickMs )
-         for( ProcessListIterator i = getProcessList().begin(); i != getProcessList().end(); i++ )
-            if( (*i)->isProcessingTicks() )
-               (*i)->processTick();
+      {
+         for( U32 i=0; i < getProcessList().size(); )
+         {
+            ITickable* iTick = getProcessList()[i];
+            if( iTick->isProcessingTicks() )
+            {
+               iTick->processTick();
+
+               // Only advance counter if the tickable hasn't deleted itself
+               if( i < getProcessList().size() && iTick == getProcessList()[i] )
+                  ++i;
+            }
+            else
+            {
+               // Move onto the next tickable
+               ++i;
+            }
+         }
+      }
+   }
 
    smLastDelta = ( smTickMs - ( targetTime & smTickMask ) ) & smTickMask;
    F32 dt = smLastDelta / F32( smTickMs );
 
-   // Now interpolate objects that want ticks
+   // Now interpolate objects that want ticks.  Note that an object should never delete
+   // itself during an interpolateTick().
    for( ProcessListIterator i = getProcessList().begin(); i != getProcessList().end(); i++ )
       if( (*i)->isProcessingTicks() )
          (*i)->interpolateTick( dt );
@@ -102,8 +121,15 @@ bool ITickable::advanceTime( U32 timeDelta )
 
    // Inform ALL objects that time was advanced
    dt = F32( timeDelta ) / 1000.f;
-   for( ProcessListIterator i = getProcessList().begin(); i != getProcessList().end(); i++ )
-      (*i)->advanceTime( dt );
+   for( U32 i=0; i < getProcessList().size(); )
+   {
+      ITickable* iTick = getProcessList()[i];
+      iTick->advanceTime( dt );
+
+      // Only advance counter if the tickable hasn't deleted itself
+      if( i < getProcessList().size() && iTick == getProcessList()[i] )
+         ++i;
+   }
 
    smLastTime = targetTime;
 
diff --git a/Engine/source/core/resourceManager.cpp b/Engine/source/core/resourceManager.cpp
index 336d59243..c52891bf9 100644
--- a/Engine/source/core/resourceManager.cpp
+++ b/Engine/source/core/resourceManager.cpp
@@ -29,6 +29,8 @@
 
 #include "console/engineAPI.h"
 
+using namespace Torque;
+
 static AutoPtr< ResourceManager > smInstance;
 
 ResourceManager::ResourceManager()
diff --git a/Engine/source/core/resourceManager.h b/Engine/source/core/resourceManager.h
index ea9aa1dc2..0d6a4686b 100644
--- a/Engine/source/core/resourceManager.h
+++ b/Engine/source/core/resourceManager.h
@@ -31,8 +31,6 @@
 #include "core/util/tDictionary.h"
 #endif
 
-using namespace Torque;
-
 class ResourceManager
 {
 public:
diff --git a/Engine/source/environment/cloudLayer.cpp b/Engine/source/environment/cloudLayer.cpp
index bf99c9312..3901df5ac 100644
--- a/Engine/source/environment/cloudLayer.cpp
+++ b/Engine/source/environment/cloudLayer.cpp
@@ -400,6 +400,7 @@ void CloudLayer::_initBuffers()
    
    mVB.set( GFX, smVertCount, GFXBufferTypeStatic );   
    GFXCloudVertex *pVert = mVB.lock(); 
+   if(!pVert) return;
 
    for ( U32 y = 0; y < smVertStride; y++ )
    {
diff --git a/Engine/source/environment/decalRoad.cpp b/Engine/source/environment/decalRoad.cpp
index 822d13ef2..7bd240095 100644
--- a/Engine/source/environment/decalRoad.cpp
+++ b/Engine/source/environment/decalRoad.cpp
@@ -721,7 +721,7 @@ void DecalRoad::prepRenderImage( SceneRenderState* state )
    if ( !smShowRoad && smEditorOpen )
       return;
 
-   const Frustum &frustum = state->getFrustum();
+   const Frustum &frustum = state->getCameraFrustum();
 
    MeshRenderInst coreRI;
    coreRI.clear();
@@ -1452,7 +1452,8 @@ void DecalRoad::_captureVerts()
    mPB.set( GFX, mTriangleCount * 3, 0, GFXBufferTypeStatic );
 
    // Lock the VertexBuffer
-   GFXVertexPNTBT *vertPtr = mVB.lock();   
+   GFXVertexPNTBT *vertPtr = mVB.lock();
+   if(!vertPtr) return;
    U32 vertIdx = 0;
 
    //
diff --git a/Engine/source/environment/scatterSky.cpp b/Engine/source/environment/scatterSky.cpp
index b47258db4..7fdf25296 100644
--- a/Engine/source/environment/scatterSky.cpp
+++ b/Engine/source/environment/scatterSky.cpp
@@ -748,6 +748,7 @@ void ScatterSky::_initVBIB()
 
    mVB.set( GFX, mVertCount, GFXBufferTypeStatic );
    ScatterSkyVertex *pVert = mVB.lock();
+   if(!pVert) return;
 
    for ( U32 y = 0; y < vertStride; y++ )
    {
@@ -929,7 +930,7 @@ void ScatterSky::_render( ObjectRenderInst *ri, SceneRenderState *state, BaseMat
    Point3F camPos( 0, 0, smViewerHeight );
    Point4F miscParams( camPos.z, camPos.z * camPos.z, mScale, mScale / mRayleighScaleDepth );
 
-   Frustum frust = state->getFrustum();
+   Frustum frust = state->getCameraFrustum();
    frust.setFarDist( smEarthRadius + smAtmosphereRadius );
    MatrixF proj( true );
    frust.getProjectionMatrix( &proj );
@@ -1072,6 +1073,7 @@ void ScatterSky::_renderMoon( ObjectRenderInst *ri, SceneRenderState *state, Bas
    GFXVertexBufferHandle< GFXVertexPCT > vb;
    vb.set( GFX, 4, GFXBufferTypeVolatile );
    GFXVertexPCT *pVert = vb.lock();
+   if(!pVert) return;
 
    for ( S32 i = 0; i < 4; i++ )
    {
diff --git a/Engine/source/environment/skyBox.cpp b/Engine/source/environment/skyBox.cpp
index f2154b395..a41b0a361 100644
--- a/Engine/source/environment/skyBox.cpp
+++ b/Engine/source/environment/skyBox.cpp
@@ -400,6 +400,7 @@ void SkyBox::_initRender()
    }
 
    GFXVertexPNTT *vertPtr = mVB.lock();
+   if(!vertPtr) return;
 
    dMemcpy( vertPtr, tmpVerts, sizeof ( GFXVertexPNTT ) * vertCount );
 
@@ -412,6 +413,7 @@ void SkyBox::_initRender()
       mFogBandVB.set( GFX, 48, GFXBufferTypeStatic );
 
    GFXVertexPC *bandVertPtr = mFogBandVB.lock();
+   if(!bandVertPtr) return;
 
    // Grab the fog color.
    ColorI fogColor( mLastFogColor.red * 255, mLastFogColor.green * 255, mLastFogColor.blue * 255 );
diff --git a/Engine/source/environment/sun.cpp b/Engine/source/environment/sun.cpp
index 00b6a0337..ae42f2bbc 100644
--- a/Engine/source/environment/sun.cpp
+++ b/Engine/source/environment/sun.cpp
@@ -487,6 +487,7 @@ void Sun::_renderCorona( ObjectRenderInst *ri, SceneRenderState *state, BaseMatI
    GFXVertexBufferHandle< GFXVertexPCT > vb;
    vb.set( GFX, 4, GFXBufferTypeVolatile );
    GFXVertexPCT *pVert = vb.lock();
+   if(!pVert) return;
 
    for ( S32 i = 0; i < 4; i++ )
    {
diff --git a/Engine/source/environment/waterPlane.cpp b/Engine/source/environment/waterPlane.cpp
index c0158a61f..eab66a466 100644
--- a/Engine/source/environment/waterPlane.cpp
+++ b/Engine/source/environment/waterPlane.cpp
@@ -173,7 +173,7 @@ void WaterPlane::unpackUpdate(NetConnection* con, BitStream* stream)
 
 void WaterPlane::setupVBIB( SceneRenderState *state )
 {
-   const Frustum &frustum = state->getFrustum();
+   const Frustum &frustum = state->getCullingFrustum();
    
    // Water base-color, assigned as color for all verts.
    const GFXVertexColor vertCol(mWaterFogData.color);
@@ -708,7 +708,7 @@ void WaterPlane::prepRenderImage( SceneRenderState *state )
 
    mMatrixSet->setSceneView(GFX->getWorldMatrix());
    
-   const Frustum &frustum = state->getFrustum();
+   const Frustum &frustum = state->getCameraFrustum();
 
    if ( mPrimBuff.isNull() || 
         mGenerateVB ||         
diff --git a/Engine/source/forest/forestRender.cpp b/Engine/source/forest/forestRender.cpp
index 0c7a3fb91..3fa2b3047 100644
--- a/Engine/source/forest/forestRender.cpp
+++ b/Engine/source/forest/forestRender.cpp
@@ -110,7 +110,7 @@ void Forest::prepRenderImage( SceneRenderState *state )
    // the forest, so pass down a LightQuery for it.
    LightQuery lightQuery;
    rdata.setLightQuery( &lightQuery );
-   Frustum culler = state->getFrustum();
+   Frustum culler = state->getCullingFrustum();
 
    // Adjust the far distance if the cull scale has changed.
    if ( !mIsEqual( cullScale, 1.0f ) )
diff --git a/Engine/source/forest/ts/tsForestCellBatch.cpp b/Engine/source/forest/ts/tsForestCellBatch.cpp
index 9d176a914..7667ce997 100644
--- a/Engine/source/forest/ts/tsForestCellBatch.cpp
+++ b/Engine/source/forest/ts/tsForestCellBatch.cpp
@@ -76,6 +76,8 @@ void TSForestCellBatch::_rebuildBatch()
 
    // Fill this puppy!
    ImposterState *vertPtr = mVB.lock();
+   if(!vertPtr) return;
+
    Vector<ForestItem>::const_iterator item = mItems.begin();
 
    const F32 radius = mDetail->getRadius();
diff --git a/Engine/source/forest/ts/tsForestItemData.cpp b/Engine/source/forest/ts/tsForestItemData.cpp
index 7a1bebadf..d5574587d 100644
--- a/Engine/source/forest/ts/tsForestItemData.cpp
+++ b/Engine/source/forest/ts/tsForestItemData.cpp
@@ -31,6 +31,7 @@
 #include "materials/materialManager.h"
 #include "forest/windDeformation.h"
 
+using namespace Torque;
 
 IMPLEMENT_CO_DATABLOCK_V1(TSForestItemData);
 
diff --git a/Engine/source/gfx/D3D9/gfxD3D9Shader.cpp b/Engine/source/gfx/D3D9/gfxD3D9Shader.cpp
index 87db8fe75..34b9c47d2 100644
--- a/Engine/source/gfx/D3D9/gfxD3D9Shader.cpp
+++ b/Engine/source/gfx/D3D9/gfxD3D9Shader.cpp
@@ -36,6 +36,8 @@
 #include "core/util/safeDelete.h"
 #include "console/console.h"
 
+using namespace Torque;
+
 extern bool gDisassembleAllShaders;
 
 /// D3DXInclude plugin
diff --git a/Engine/source/gfx/bitmap/gBitmap.cpp b/Engine/source/gfx/bitmap/gBitmap.cpp
index a2a64cf5f..f0e0ad11e 100644
--- a/Engine/source/gfx/bitmap/gBitmap.cpp
+++ b/Engine/source/gfx/bitmap/gBitmap.cpp
@@ -33,6 +33,7 @@
 #include "platform/profiler.h"
 #include "console/engineAPI.h"
 
+using namespace Torque;
 
 const U32 GBitmap::csFileVersion   = 3;
 
diff --git a/Engine/source/gfx/gfxCardProfile.cpp b/Engine/source/gfx/gfxCardProfile.cpp
index cdfdefc0b..7fdbc3eae 100644
--- a/Engine/source/gfx/gfxCardProfile.cpp
+++ b/Engine/source/gfx/gfxCardProfile.cpp
@@ -106,6 +106,7 @@ void GFXCardProfiler::init()
    Con::printf("   o Chipset : '%s'", getChipString().c_str());
    Con::printf("   o Card    : '%s'", getCardString().c_str());
    Con::printf("   o Version : '%s'", getVersionString().c_str());
+   Con::printf("   o VRAM    : %d MB", getVideoMemoryInMB());
 
    // Do card-specific setup...
    Con::printf("   - Scanning card capabilities...");
diff --git a/Engine/source/gfx/gfxDevice.cpp b/Engine/source/gfx/gfxDevice.cpp
index 9e57bcc81..fbf1f3c4b 100644
--- a/Engine/source/gfx/gfxDevice.cpp
+++ b/Engine/source/gfx/gfxDevice.cpp
@@ -794,6 +794,8 @@ void GFXDevice::setCubeTexture( U32 stage, GFXCubemap *texture )
    mCurrentTexture[stage] = NULL;
 }
 
+//------------------------------------------------------------------------------
+
 inline bool GFXDevice::beginScene()
 {
    AssertFatal( mCanCurrentlyRender == false, "GFXDevice::beginScene() - The scene has already begun!" );
@@ -806,8 +808,6 @@ inline bool GFXDevice::beginScene()
    return beginSceneInternal();
 }
 
-//------------------------------------------------------------------------------
-
 inline void GFXDevice::endScene()
 {
    AssertFatal( mCanCurrentlyRender == true, "GFXDevice::endScene() - The scene has already ended!" );
@@ -819,6 +819,22 @@ inline void GFXDevice::endScene()
    mDeviceStatistics.exportToConsole();
 }
 
+inline void GFXDevice::beginField()
+{
+   AssertFatal( mCanCurrentlyRender == true, "GFXDevice::beginField() - The scene has not yet begun!" );
+
+   // Send the start of field signal.
+   getDeviceEventSignal().trigger( GFXDevice::deStartOfField );
+}
+
+inline void GFXDevice::endField()
+{
+   AssertFatal( mCanCurrentlyRender == true, "GFXDevice::endField() - The scene has not yet begun!" );
+
+   // Send the end of field signal.
+   getDeviceEventSignal().trigger( GFXDevice::deEndOfField );
+}
+
 void GFXDevice::setViewport( const RectI &inRect ) 
 {
    // Clip the rect against the renderable size.
@@ -849,7 +865,7 @@ void GFXDevice::popActiveRenderTarget()
    mRTStack.pop_back();
 }
 
-void GFXDevice::setActiveRenderTarget( GFXTarget *target )
+void GFXDevice::setActiveRenderTarget( GFXTarget *target, bool updateViewport )
 {
    AssertFatal( target, 
       "GFXDevice::setActiveRenderTarget - must specify a render target!" );
@@ -878,7 +894,10 @@ void GFXDevice::setActiveRenderTarget( GFXTarget *target )
    // We should consider removing this and making it the
    // responsibility of the caller to set a proper viewport
    // when the target is changed.   
-   setViewport( RectI( Point2I::Zero, mCurrentRT->getSize() ) );
+   if ( updateViewport )
+   {
+      setViewport( RectI( Point2I::Zero, mCurrentRT->getSize() ) );
+   }
 }
 
 /// Helper class for GFXDevice::describeResources.
diff --git a/Engine/source/gfx/gfxDevice.h b/Engine/source/gfx/gfxDevice.h
index e153d02c5..44a889f66 100644
--- a/Engine/source/gfx/gfxDevice.h
+++ b/Engine/source/gfx/gfxDevice.h
@@ -209,6 +209,12 @@ public:
       
       /// The device is about to finish rendering a frame
       deEndOfFrame,
+
+      /// The device has started rendering a frame's field (such as for side-by-side rendering)
+      deStartOfField,
+
+      /// The device is about to finish rendering a frame's field
+      deEndOfField,
    };
 
    typedef Signal <bool (GFXDeviceEventType)> DeviceEventSignal;
@@ -695,7 +701,7 @@ public:
    void popActiveRenderTarget();
 
    /// Assign a new active render target.
-   void setActiveRenderTarget( GFXTarget *target );
+   void setActiveRenderTarget( GFXTarget *target, bool updateViewport=true );
 
    /// Returns the current active render target.
    inline GFXTarget* getActiveRenderTarget() { return mCurrentRT; }
@@ -735,6 +741,8 @@ public:
    virtual void clear( U32 flags, ColorI color, F32 z, U32 stencil ) = 0;
    virtual bool beginScene();
    virtual void endScene();
+   virtual void beginField();
+   virtual void endField();
 
    virtual GFXTexHandle & getFrontBuffer(){ return mFrontBuffer[mCurrentFrontBufferIdx]; }
 
diff --git a/Engine/source/gfx/gfxTextureManager.cpp b/Engine/source/gfx/gfxTextureManager.cpp
index 3f755b5e6..7978eb6da 100644
--- a/Engine/source/gfx/gfxTextureManager.cpp
+++ b/Engine/source/gfx/gfxTextureManager.cpp
@@ -35,6 +35,7 @@
 #include "console/consoleTypes.h"
 #include "console/engineAPI.h"
 
+using namespace Torque;
 
 //#define DEBUG_SPEW
 
diff --git a/Engine/source/gui/3d/guiTSControl.cpp b/Engine/source/gui/3d/guiTSControl.cpp
index 95dc960c3..4e42dbf84 100644
--- a/Engine/source/gui/3d/guiTSControl.cpp
+++ b/Engine/source/gui/3d/guiTSControl.cpp
@@ -316,6 +316,21 @@ void GuiTSCtrl::onRender(Point2I offset, const RectI &updateRect)
       return;
    }
 
+   // Set up the appropriate render style
+   U32 prevRenderStyle = GFX->getCurrentRenderStyle();
+   Point2F prevProjectionOffset = GFX->getCurrentProjectionOffset();
+   Point3F prevEyeOffset = GFX->getStereoEyeOffset();
+   if(mRenderStyle == RenderStyleStereoSideBySide)
+   {
+      GFX->setCurrentRenderStyle(GFXDevice::RS_StereoSideBySide);
+      GFX->setCurrentProjectionOffset(mLastCameraQuery.projectionOffset);
+      GFX->setStereoEyeOffset(mLastCameraQuery.eyeOffset);
+   }
+   else
+   {
+      GFX->setCurrentRenderStyle(GFXDevice::RS_Standard);
+   }
+
    if ( mReflectPriority > 0 )
    {
       // Get the total reflection priority.
@@ -338,21 +353,6 @@ void GuiTSCtrl::onRender(Point2I offset, const RectI &updateRect)
       mLastCameraQuery.cameraMatrix.mul(rotMat);
    }
 
-   // Set up the appropriate render style
-   U32 prevRenderStyle = GFX->getCurrentRenderStyle();
-   Point2F prevProjectionOffset = GFX->getCurrentProjectionOffset();
-   Point3F prevEyeOffset = GFX->getStereoEyeOffset();
-   if(mRenderStyle == RenderStyleStereoSideBySide)
-   {
-      GFX->setCurrentRenderStyle(GFXDevice::RS_StereoSideBySide);
-      GFX->setCurrentProjectionOffset(mLastCameraQuery.projectionOffset);
-      GFX->setStereoEyeOffset(mLastCameraQuery.eyeOffset);
-   }
-   else
-   {
-      GFX->setCurrentRenderStyle(GFXDevice::RS_Standard);
-   }
-
    // set up the camera and viewport stuff:
    F32 wwidth;
    F32 wheight;
diff --git a/Engine/source/gui/controls/guiMLTextCtrl.cpp b/Engine/source/gui/controls/guiMLTextCtrl.cpp
index d026bfd92..63f9c7cdd 100644
--- a/Engine/source/gui/controls/guiMLTextCtrl.cpp
+++ b/Engine/source/gui/controls/guiMLTextCtrl.cpp
@@ -121,10 +121,10 @@ DefineEngineMethod( GuiMLTextCtrl, getText, const char*, (),,
    return( object->getTextContent() );
 }
 
-DefineEngineMethod( GuiMLTextCtrl, addText, void, ( const char* text, bool reformat),,
+DefineEngineMethod( GuiMLTextCtrl, addText, void, ( const char* text, bool reformat), (true),
    "@brief Appends the text in the control with additional text. Also .\n\n"
    "@param text New text to append to the existing text.\n"
-   "@param reformat If true, the control will also be visually reset.\n"
+   "@param reformat If true, the control will also be visually reset (defaults to true).\n"
    "@tsexample\n"
    "// Define new text to add\n"
    "%text = \"New Text to Add\";\n\n"
diff --git a/Engine/source/gui/worldEditor/terrainEditor.cpp b/Engine/source/gui/worldEditor/terrainEditor.cpp
index 70c012b39..3aa17e1be 100644
--- a/Engine/source/gui/worldEditor/terrainEditor.cpp
+++ b/Engine/source/gui/worldEditor/terrainEditor.cpp
@@ -670,7 +670,7 @@ TerrainEditor::TerrainEditor() :
    mUndoSel(0),
    mGridUpdateMin( S32_MAX, S32_MAX ),
    mGridUpdateMax( 0, 0 ),
-   mMaxBrushSize(48,48),
+   mMaxBrushSize(256,256),
    mNeedsGridUpdate( false ),
    mNeedsMaterialUpdate( false ),
    mMouseDown( false )
diff --git a/Engine/source/gui/worldEditor/terrainEditor.h b/Engine/source/gui/worldEditor/terrainEditor.h
index 1a361f591..f0d03daf3 100644
--- a/Engine/source/gui/worldEditor/terrainEditor.h
+++ b/Engine/source/gui/worldEditor/terrainEditor.h
@@ -112,7 +112,7 @@ protected:
 
 public:
 
-   enum { MaxBrushDim = 40 };
+   enum { MaxBrushDim = 256 };
 
    Brush(TerrainEditor * editor);
    virtual ~Brush(){};
diff --git a/Engine/source/gui/worldEditor/worldEditor.cpp b/Engine/source/gui/worldEditor/worldEditor.cpp
index 1500e2030..338bb3c63 100644
--- a/Engine/source/gui/worldEditor/worldEditor.cpp
+++ b/Engine/source/gui/worldEditor/worldEditor.cpp
@@ -1505,7 +1505,8 @@ void WorldEditor::renderSplinePath(SimPath::Path *path)
 
    GFXVertexBufferHandle<GFXVertexPC> vb;
    vb.set(GFX, 3*batchSize, GFXBufferTypeVolatile);
-   vb.lock();
+   void *lockPtr = vb.lock();
+   if(!lockPtr) return;
 
    U32 vIdx=0;
 
@@ -1542,7 +1543,8 @@ void WorldEditor::renderSplinePath(SimPath::Path *path)
 
          // Reset for next pass...
          vIdx = 0;
-         vb.lock();
+         void *lockPtr = vb.lock();
+         if(!lockPtr) return;
       }
    }
 
diff --git a/Engine/source/lighting/advanced/advancedLightBinManager.cpp b/Engine/source/lighting/advanced/advancedLightBinManager.cpp
index 069f0e762..d8620e856 100644
--- a/Engine/source/lighting/advanced/advancedLightBinManager.cpp
+++ b/Engine/source/lighting/advanced/advancedLightBinManager.cpp
@@ -444,7 +444,7 @@ void AdvancedLightBinManager::_deleteLightMaterials()
 void AdvancedLightBinManager::_setupPerFrameParameters( const SceneRenderState *state )
 {
    PROFILE_SCOPE( AdvancedLightBinManager_SetupPerFrameParameters );
-   const Frustum &frustum = state->getFrustum();
+   const Frustum &frustum = state->getCameraFrustum();
 
    MatrixF invCam( frustum.getTransform() );
    invCam.inverse();
diff --git a/Engine/source/lighting/advanced/advancedLightBufferConditioner.cpp b/Engine/source/lighting/advanced/advancedLightBufferConditioner.cpp
index 02919e26a..5dff21fb4 100644
--- a/Engine/source/lighting/advanced/advancedLightBufferConditioner.cpp
+++ b/Engine/source/lighting/advanced/advancedLightBufferConditioner.cpp
@@ -51,7 +51,7 @@ Var *AdvancedLightBufferConditioner::_conditionOutput( Var *unconditionedOutput,
       if(getBufferFormat() == GFXFormatR16G16B16A16)
          meta->addStatement( new GenOp( "   @ = max(4.0, (float4(lightColor, specular) * NL_att + float4(bufferSample.rgb, 0.0)) / 4.0);\r\n", outputDecl ) );
       else
-         meta->addStatement( new GenOp( "   @ = float4(lightColor, specular) * NL_att + float4(bufferSample.rgb, 0.0);\r\n", outputDecl ) );
+         meta->addStatement( new GenOp( "   @ = float4(lightColor, 0) * NL_att + float4(bufferSample.rgb, specular);\r\n", outputDecl ) );
    }
    else
    {
@@ -80,7 +80,7 @@ Var *AdvancedLightBufferConditioner::_unconditionInput( Var *conditionedInput, M
       meta->addStatement( new GenOp( "   NL_att = @.b;\r\n", conditionedInput ) );
       meta->addStatement( new GenOp( "   lightColor = DecodeLuv(float3(saturate(NL_att), @.rg * 0.62));\r\n", conditionedInput ) );
    }
-   meta->addStatement( new GenOp( "   specular = max(@.a / NL_att, 0.00001f);\r\n", conditionedInput ) );
+   meta->addStatement( new GenOp( "   specular = @.a;\r\n", conditionedInput ) );
 
    return NULL;
 }
diff --git a/Engine/source/lighting/advanced/glsl/advancedLightingFeaturesGLSL.cpp b/Engine/source/lighting/advanced/glsl/advancedLightingFeaturesGLSL.cpp
index b46f46cb6..1da348f03 100644
--- a/Engine/source/lighting/advanced/glsl/advancedLightingFeaturesGLSL.cpp
+++ b/Engine/source/lighting/advanced/glsl/advancedLightingFeaturesGLSL.cpp
@@ -517,6 +517,12 @@ void DeferredPixelSpecularGLSL::processPix(  Vector<ShaderComponent*> &component
       specPow->constSortPos = cspPotentialPrimitive;
    }
 
+   Var *specStrength = new Var;
+   specStrength->setType( "float" );
+   specStrength->setName( "specularStrength" );
+   specStrength->uniform = true;
+   specStrength->constSortPos = cspPotentialPrimitive;
+
    Var *constSpecPow = new Var;
    constSpecPow->setType( "float" );
    constSpecPow->setName( "constantSpecularPower" );
@@ -527,7 +533,7 @@ void DeferredPixelSpecularGLSL::processPix(  Vector<ShaderComponent*> &component
    AssertFatal( lightInfoSamp, "Something hosed the deferred features! Can't find lightInfoSample" );
 
    // (a^m)^n = a^(m*n)
-   meta->addStatement( new GenOp( "   @ = pow(d_specular, ceil(@ / @)) * d_NL_Att;\r\n", specDecl, specPow, constSpecPow ) );
+   meta->addStatement( new GenOp( "   @ = pow(d_specular, ceil(@ / @)) * @;\r\n", specDecl, specPow, constSpecPow, specStrength ) );
 
    LangElement *specMul = new GenOp( "@ * @", specCol, specular );
    LangElement *final = specMul;
@@ -539,8 +545,7 @@ void DeferredPixelSpecularGLSL::processPix(  Vector<ShaderComponent*> &component
       final = new GenOp( "@ * @.a", final, bumpSample );
    }
 
-   // add to color
-   meta->addStatement( new GenOp( "   @;\r\n", assignColor( final, Material::Add ) ) );
+   // add to color   meta->addStatement( new GenOp( "   @;\r\n", assignColor( final, Material::Add ) ) );
 
    output = meta;
 }
diff --git a/Engine/source/lighting/advanced/hlsl/advancedLightingFeaturesHLSL.cpp b/Engine/source/lighting/advanced/hlsl/advancedLightingFeaturesHLSL.cpp
index d9893c8cd..c2bf42834 100644
--- a/Engine/source/lighting/advanced/hlsl/advancedLightingFeaturesHLSL.cpp
+++ b/Engine/source/lighting/advanced/hlsl/advancedLightingFeaturesHLSL.cpp
@@ -465,6 +465,12 @@ void DeferredPixelSpecularHLSL::processPix(  Vector<ShaderComponent*> &component
       specPow->constSortPos = cspPotentialPrimitive;
    }
 
+   Var *specStrength = new Var;
+   specStrength->setType( "float" );
+   specStrength->setName( "specularStrength" );
+   specStrength->uniform = true;
+   specStrength->constSortPos = cspPotentialPrimitive;
+
    Var *lightInfoSamp = (Var *)LangElement::find( "lightInfoSample" );
    Var *d_specular = (Var*)LangElement::find( "d_specular" );
    Var *d_NL_Att = (Var*)LangElement::find( "d_NL_Att" );
@@ -474,7 +480,7 @@ void DeferredPixelSpecularHLSL::processPix(  Vector<ShaderComponent*> &component
 
    // (a^m)^n = a^(m*n)
    meta->addStatement( new GenOp( "   @ = pow( @, ceil(@ / AL_ConstantSpecularPower)) * @;\r\n", 
-      specDecl, d_specular, specPow, d_NL_Att ) );
+      specDecl, d_specular, specPow, specStrength ) );
 
    LangElement *specMul = new GenOp( "float4( @.rgb, 0 ) * @", specCol, specular );
    LangElement *final = specMul;
diff --git a/Engine/source/lighting/common/projectedShadow.cpp b/Engine/source/lighting/common/projectedShadow.cpp
index de3a31f8f..ed4a7b903 100644
--- a/Engine/source/lighting/common/projectedShadow.cpp
+++ b/Engine/source/lighting/common/projectedShadow.cpp
@@ -324,7 +324,7 @@ bool ProjectedShadow::_updateDecal( const SceneRenderState *state )
    bool shouldClip = lightDirChanged || hasMoved || hasScaled;
 
    // Now, check and see if the object is visible.
-   const Frustum &frust = state->getFrustum();
+   const Frustum &frust = state->getCullingFrustum();
    if ( frust.isCulled( SphereF( mDecalInstance->mPosition, mDecalInstance->mSize * mDecalInstance->mSize ) ) && !shouldClip )
       return false;
 
diff --git a/Engine/source/lighting/shadowMap/pssmLightShadowMap.cpp b/Engine/source/lighting/shadowMap/pssmLightShadowMap.cpp
index e8bf8df31..71a4fe585 100644
--- a/Engine/source/lighting/shadowMap/pssmLightShadowMap.cpp
+++ b/Engine/source/lighting/shadowMap/pssmLightShadowMap.cpp
@@ -207,7 +207,7 @@ void PSSMLightShadowMap::_render(   RenderPassManager* renderPass,
       _setNumSplits( params->numSplits, texSize );
    mLogWeight = params->logWeight;
 
-   Frustum fullFrustum( diffuseState->getFrustum() );
+   Frustum fullFrustum( diffuseState->getCameraFrustum() );
    fullFrustum.cropNearFar(fullFrustum.getNearDist(), params->shadowDistance);
 
    GFXFrustumSaver frustSaver;
@@ -223,7 +223,7 @@ void PSSMLightShadowMap::_render(   RenderPassManager* renderPass,
 
    // Calculate our standard light matrices
    MatrixF lightMatrix;
-   calcLightMatrices( lightMatrix, diffuseState->getFrustum() );
+   calcLightMatrices( lightMatrix, diffuseState->getCameraFrustum() );
    lightMatrix.inverse();
    MatrixF lightViewProj = GFX->getProjectionMatrix() * lightMatrix;
 
diff --git a/Engine/source/lighting/shadowMap/singleLightShadowMap.cpp b/Engine/source/lighting/shadowMap/singleLightShadowMap.cpp
index 3d13f245c..e8dc240a2 100644
--- a/Engine/source/lighting/shadowMap/singleLightShadowMap.cpp
+++ b/Engine/source/lighting/shadowMap/singleLightShadowMap.cpp
@@ -67,7 +67,7 @@ void SingleLightShadowMap::_render( RenderPassManager* renderPass,
    GFXTransformSaver saver;
 
    MatrixF lightMatrix;
-   calcLightMatrices( lightMatrix, diffuseState->getFrustum() );
+   calcLightMatrices( lightMatrix, diffuseState->getCameraFrustum() );
    lightMatrix.inverse();
    GFX->setWorldMatrix(lightMatrix);
 
diff --git a/Engine/source/materials/matInstance.cpp b/Engine/source/materials/matInstance.cpp
index b7be80f3e..a61b75bf7 100644
--- a/Engine/source/materials/matInstance.cpp
+++ b/Engine/source/materials/matInstance.cpp
@@ -242,7 +242,7 @@ void MatInstance::construct()
 {
    mUserObject = NULL;
    mCurPass = -1;
-   mProcessedMaterial = false;
+   mProcessedMaterial = NULL;
    mVertexFormat = NULL;
    mMaxStages = 1;
    mActiveParameters = NULL;
diff --git a/Engine/source/materials/materialDefinition.cpp b/Engine/source/materials/materialDefinition.cpp
index 718ca7d08..e654a3700 100644
--- a/Engine/source/materials/materialDefinition.cpp
+++ b/Engine/source/materials/materialDefinition.cpp
@@ -116,6 +116,7 @@ Material::Material()
       mSpecular[i].set( 1.0f, 1.0f, 1.0f, 1.0f );
 
       mSpecularPower[i] = 8.0f;
+      mSpecularStrength[i] = 1.0f;
       mPixelSpecular[i] = false;
 
       mParallaxScale[i] = 0.0f;
@@ -239,7 +240,10 @@ void Material::initPersistFields()
          "The color of the specular highlight when not using a specularMap." );
 
       addField("specularPower", TypeF32, Offset(mSpecularPower, Material), MAX_STAGES,
-         "The intensity of the specular highlight when not using a specularMap." );
+         "The hardness of the specular highlight when not using a specularMap." );
+
+		addField("specularStrength", TypeF32, Offset(mSpecularStrength, Material), MAX_STAGES,
+         "The strength of the specular highlight when not using a specularMap." );
 
       addField("pixelSpecular", TypeBool, Offset(mPixelSpecular, Material), MAX_STAGES, 
          "This enables per-pixel specular highlights controlled by the alpha channel of the "
diff --git a/Engine/source/materials/materialDefinition.h b/Engine/source/materials/materialDefinition.h
index cf867bdba..b526bcb06 100644
--- a/Engine/source/materials/materialDefinition.h
+++ b/Engine/source/materials/materialDefinition.h
@@ -223,6 +223,7 @@ public:
    ColorF mSpecular[MAX_STAGES];
 
    F32 mSpecularPower[MAX_STAGES];
+   F32 mSpecularStrength[MAX_STAGES];
    bool mPixelSpecular[MAX_STAGES];
 
    bool mVertLit[MAX_STAGES];
diff --git a/Engine/source/materials/processedMaterial.cpp b/Engine/source/materials/processedMaterial.cpp
index 1a18ef225..34ce285a0 100644
--- a/Engine/source/materials/processedMaterial.cpp
+++ b/Engine/source/materials/processedMaterial.cpp
@@ -217,13 +217,13 @@ void ProcessedMaterial::_initPassStateBlock( RenderPassData *rpd, GFXStateBlockD
       _setBlendState( rpd->mBlendOp, result );
    }
 
-   if (mMaterial->isDoubleSided())
+   if (mMaterial && mMaterial->isDoubleSided())
    {
       result.cullDefined = true;
       result.cullMode = GFXCullNone;         
    }
 
-   if(mMaterial->mAlphaTest)
+   if(mMaterial && mMaterial->mAlphaTest)
    {
       result.alphaDefined = true;
       result.alphaTestEnable = mMaterial->mAlphaTest;
@@ -235,7 +235,7 @@ void ProcessedMaterial::_initPassStateBlock( RenderPassData *rpd, GFXStateBlockD
    NamedTexTarget *texTarget;
 
    U32 maxAnisotropy = 1;
-   if ( mMaterial->mUseAnisotropic[ rpd->mStageNum ] )
+   if (mMaterial &&  mMaterial->mUseAnisotropic[ rpd->mStageNum ] )
       maxAnisotropy = MATMGR->getDefaultAnisotropy();
 
    for( U32 i=0; i < rpd->mNumTex; i++ )
diff --git a/Engine/source/materials/processedShaderMaterial.cpp b/Engine/source/materials/processedShaderMaterial.cpp
index db6e70803..01741f496 100644
--- a/Engine/source/materials/processedShaderMaterial.cpp
+++ b/Engine/source/materials/processedShaderMaterial.cpp
@@ -54,6 +54,7 @@ void ShaderConstHandles::init( GFXShader *shader, CustomMaterial* mat /*=NULL*/
    mToneMapTexSC = shader->getShaderConstHandle(ShaderGenVars::toneMap);
    mSpecularColorSC = shader->getShaderConstHandle(ShaderGenVars::specularColor);
    mSpecularPowerSC = shader->getShaderConstHandle(ShaderGenVars::specularPower);
+   mSpecularStrengthSC = shader->getShaderConstHandle(ShaderGenVars::specularStrength);
    mParallaxInfoSC = shader->getShaderConstHandle("$parallaxInfo");
    mFogDataSC = shader->getShaderConstHandle(ShaderGenVars::fogData);
    mFogColorSC = shader->getShaderConstHandle(ShaderGenVars::fogColor);
@@ -993,6 +994,7 @@ void ProcessedShaderMaterial::_setShaderConstants(SceneRenderState * state, cons
 
    shaderConsts->setSafe(handles->mSpecularColorSC, mMaterial->mSpecular[stageNum]);   
    shaderConsts->setSafe(handles->mSpecularPowerSC, mMaterial->mSpecularPower[stageNum]);
+   shaderConsts->setSafe(handles->mSpecularStrengthSC, mMaterial->mSpecularStrength[stageNum]);
 
    shaderConsts->setSafe(handles->mParallaxInfoSC, mMaterial->mParallaxScale[stageNum]);   
    shaderConsts->setSafe(handles->mMinnaertConstantSC, mMaterial->mMinnaertConstant[stageNum]);
@@ -1198,6 +1200,7 @@ void ProcessedShaderMaterial::setBuffers( GFXVertexBufferHandleBase *vertBuffer,
    GFXVertexBufferDataHandle instVB;
    instVB.set( GFX, instFormat->getSizeInBytes(), instFormat, instCount, GFXBufferTypeVolatile );
    U8 *dest = instVB.lock();
+   if(!dest) return;
    dMemcpy( dest, mInstancingState->getBuffer(), instFormat->getSizeInBytes() * instCount );
    instVB.unlock();
 
@@ -1256,7 +1259,7 @@ MaterialParameterHandle* ProcessedShaderMaterial::getMaterialParameterHandle(con
 /// This is here to deal with the differences between ProcessedCustomMaterials and ProcessedShaderMaterials.
 GFXShaderConstBuffer* ProcessedShaderMaterial::_getShaderConstBuffer( const U32 pass )
 {   
-   if (pass < mPasses.size())
+   if (mCurrentParams && pass < mPasses.size())
    {
       return static_cast<ShaderMaterialParameters*>(mCurrentParams)->getBuffer(pass);
    }
diff --git a/Engine/source/materials/processedShaderMaterial.h b/Engine/source/materials/processedShaderMaterial.h
index 3858204bb..a39e8df75 100644
--- a/Engine/source/materials/processedShaderMaterial.h
+++ b/Engine/source/materials/processedShaderMaterial.h
@@ -46,6 +46,7 @@ public:
    GFXShaderConstHandle* mTexMatSC;
    GFXShaderConstHandle* mSpecularColorSC;
    GFXShaderConstHandle* mSpecularPowerSC;
+   GFXShaderConstHandle* mSpecularStrengthSC;
    GFXShaderConstHandle* mParallaxInfoSC;
    GFXShaderConstHandle* mFogDataSC;
    GFXShaderConstHandle* mFogColorSC;   
diff --git a/Engine/source/math/mEase.h b/Engine/source/math/mEase.h
index 140a468d9..46ded135b 100644
--- a/Engine/source/math/mEase.h
+++ b/Engine/source/math/mEase.h
@@ -278,7 +278,7 @@ inline F32 mEaseOutElastic(F32 t, F32 b, F32 c, F32 d, F32 a, F32 p) {
 	F32 s;
 	if (a < mFabs(c)) { a=c; s=p/4; }
 	else s = p/(2*M_PI_F) * mAsin (c/a);
-	return a*mPow(2,-10*t) * mAsin( (t*d-s)*(2*M_PI_F)/p ) + c + b;
+	return a*mPow(2,-10*t) * mSin( (t*d-s)*(2*M_PI_F)/p ) + c + b;
 };
 
 inline F32 mEaseInOutElastic(F32 t, F32 b, F32 c, F32 d, F32 a, F32 p) {
diff --git a/Engine/source/math/mRect.h b/Engine/source/math/mRect.h
index 8b4fb6249..61985343b 100644
--- a/Engine/source/math/mRect.h
+++ b/Engine/source/math/mRect.h
@@ -405,7 +405,7 @@ inline bool RectF::intersectTriangle(const Point2F &a, const Point2F &b, const P
    // 3 point plus 12 edge tests.
 
    // Check each triangle point to see if it's in us.
-   if(contains(a) || contains(b) || contains(b))
+   if(contains(a) || contains(b) || contains(c))
       return true;
 
    // Check a-b against the rect.
diff --git a/Engine/source/math/util/frustum.cpp b/Engine/source/math/util/frustum.cpp
index 7cff16e4c..f95814b3f 100644
--- a/Engine/source/math/util/frustum.cpp
+++ b/Engine/source/math/util/frustum.cpp
@@ -228,6 +228,100 @@ void Frustum::cropNearFar(F32 newNearDist, F32 newFarDist)
 
 //-----------------------------------------------------------------------------
 
+bool Frustum::bakeProjectionOffset()
+{
+   // Nothing to bake if ortho
+   if( mIsOrtho )
+      return false;
+
+   // Nothing to bake if no offset
+   if( mProjectionOffset.isZero() )
+      return false;
+
+   // Near plane points in camera space
+   Point3F np[4];
+   np[0].set( mNearLeft, mNearDist, mNearTop );       // NearTopLeft
+   np[1].set( mNearRight, mNearDist, mNearTop );      // NearTopRight
+   np[2].set( mNearLeft, mNearDist, mNearBottom );    // NearBottomLeft
+   np[3].set( mNearRight, mNearDist, mNearBottom );   // NearBottomRight
+
+   // Generate the near plane
+   PlaneF nearPlane( np[0], np[1], np[3] );
+
+   // Far plane points in camera space
+   const F32 farOverNear = mFarDist / mNearDist;
+   Point3F fp0( mNearLeft * farOverNear, mFarDist, mNearTop * farOverNear );     // FarTopLeft
+   Point3F fp1( mNearRight * farOverNear, mFarDist, mNearTop * farOverNear );    // FarTopRight
+   Point3F fp2( mNearLeft * farOverNear, mFarDist, mNearBottom * farOverNear );  // FarBottomLeft
+   Point3F fp3( mNearRight * farOverNear, mFarDist, mNearBottom * farOverNear ); // FarBottomRight
+
+   // Generate the far plane
+   PlaneF farPlane( fp0, fp1, fp3 );
+
+   // The offset camera point
+   Point3F offsetCamera( mProjectionOffset.x, 0.0f, mProjectionOffset.y );
+
+   // The near plane point we'll be using for our calculations below
+   U32 nIndex = 0;
+   if( mProjectionOffset.x < 0.0 )
+   {
+      // Offset to the left so we'll need to use the near plane point on the right
+      nIndex = 1;
+   }
+   if( mProjectionOffset.y > 0.0 )
+   {
+      // Offset to the top so we'll need to use the near plane point at the bottom
+      nIndex += 2;
+   }
+
+   // Begin by calculating the offset point on the far plane as it goes
+   // from the offset camera to the edge of the near plane.
+   Point3F farPoint;
+   Point3F fdir = np[nIndex] - offsetCamera;
+   fdir.normalize();
+   if( farPlane.intersect(offsetCamera, fdir, &farPoint) )
+   {
+      // Calculate the new near plane edge from the non-offset camera position
+      // to the far plane point from above.
+      Point3F nearPoint;
+      Point3F ndir = farPoint;
+      ndir.normalize();
+      if( nearPlane.intersect( Point3F::Zero, ndir, &nearPoint) )
+      {
+         // Handle a x offset
+         if( mProjectionOffset.x < 0.0 )
+         {
+            // The new near plane right side
+            mNearRight = nearPoint.x;
+         }
+         else if( mProjectionOffset.x > 0.0 )
+         {
+            // The new near plane left side
+            mNearLeft = nearPoint.x;
+         }
+
+         // Handle a y offset
+         if( mProjectionOffset.y < 0.0 )
+         {
+            // The new near plane top side
+            mNearTop = nearPoint.y;
+         }
+         else if( mProjectionOffset.y > 0.0 )
+         {
+            // The new near plane bottom side
+            mNearBottom = nearPoint.y;
+         }
+      }
+   }
+
+   mDirty = true;
+
+   // Indicate that we've modified the frustum
+   return true;
+}
+
+//-----------------------------------------------------------------------------
+
 void FrustumData::_update() const
 {
    if( !mDirty )
diff --git a/Engine/source/math/util/frustum.h b/Engine/source/math/util/frustum.h
index 869abcf47..0f71063d9 100644
--- a/Engine/source/math/util/frustum.h
+++ b/Engine/source/math/util/frustum.h
@@ -284,7 +284,10 @@ class Frustum : public PolyhedronImpl< FrustumData >
             ( mNearTop == frustum.mNearTop ) &&
             ( mNearBottom == frustum.mNearBottom ) &&
             ( mNearDist == frustum.mNearDist ) &&
-            ( mFarDist == frustum.mFarDist ) );
+            ( mFarDist == frustum.mFarDist ) &&
+            ( mProjectionOffset.x == frustum.mProjectionOffset.x ) &&
+            ( mProjectionOffset.y == frustum.mProjectionOffset.y ) );
+
       }
       bool operator!=( const Frustum& frustum ) const { return !( *this == frustum ); }
 
@@ -409,6 +412,9 @@ class Frustum : public PolyhedronImpl< FrustumData >
       /// points typically used for early rejection.
       const Box3F& getBounds() const { _update(); return mBounds; }
 
+      // Does the frustum have a projection offset?
+      bool hasProjectionOffset() const { return !mProjectionOffset.isZero(); }
+
       /// Get the offset used when calculating the projection matrix
       const Point2F& getProjectionOffset() const { return mProjectionOffset; }
 
@@ -421,6 +427,10 @@ class Frustum : public PolyhedronImpl< FrustumData >
       /// Clear any offset used when calculating the projection matrix
       void clearProjectionOffset() { mProjectionOffset.zero(); mProjectionOffsetMatrix.identity(); }
 
+      /// Enlarges the frustum to contain the planes generated by a project offset, if any.
+      /// Used by scene culling to ensure that all object are contained within the asymetrical frustum.
+      bool bakeProjectionOffset();
+
       /// Generates a projection matrix from the frustum.
       void getProjectionMatrix( MatrixF *proj, bool gfxRotate=true ) const;
 
diff --git a/Engine/source/navigation/duDebugDrawTorque.cpp b/Engine/source/navigation/duDebugDrawTorque.cpp
new file mode 100644
index 000000000..3692e50ee
--- /dev/null
+++ b/Engine/source/navigation/duDebugDrawTorque.cpp
@@ -0,0 +1,244 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#include "torqueRecast.h"
+#include "duDebugDrawTorque.h"
+
+#include "gfx/gfxDevice.h"
+#include "gfx/primBuilder.h"
+#include "gfx/gfxStateBlock.h"
+
+/// @class duDebugDrawTorque
+/// This class uses the primitive builder (gfx/primBuild.h) to render navmeshes
+/// and other Recast data. To facilitate the primbuilder's requirement to know
+/// the number of vertices to render beforehand, this class stores all vertices
+/// in a buffer of its own, then passes on that known-size buffer.
+/// This means that you only need to call the duDebugDraw functions when your
+/// data changes. At other times, you can cache the duDebugDrawTorque object
+/// and call its render() method, which actually renders its buffered data.
+
+duDebugDrawTorque::duDebugDrawTorque()
+{
+   mOverrideColor = 0;
+   mOverride = false;
+   mGroup = 0;
+}
+
+duDebugDrawTorque::~duDebugDrawTorque()
+{
+   clear();
+}
+
+void duDebugDrawTorque::depthMask(bool state)
+{
+   mDesc.setZReadWrite(state, state);
+}
+
+void duDebugDrawTorque::texture(bool state)
+{
+}
+
+/// Begin drawing primitives.
+/// @param prim [in] primitive type to draw, one of rcDebugDrawPrimitives.
+/// @param size [in] size of a primitive, applies to point size and line width only.
+void duDebugDrawTorque::begin(duDebugDrawPrimitives prim, float size)
+{
+   mCurrColor = -1;
+   mQuadsMode = false;
+   mVertCount = 0;
+   mPrimType = 0;
+   switch(prim)
+   {
+   case DU_DRAW_POINTS: mPrimType = GFXPointList;    break;
+   case DU_DRAW_LINES:  mPrimType = GFXLineList;     break;
+   case DU_DRAW_TRIS:   mPrimType = GFXTriangleList; break;
+   case DU_DRAW_QUADS:  mPrimType = GFXTriangleList;
+                        mQuadsMode = true;           break;
+   }
+   mBuffers.push_back(Buffer(mPrimType));
+   mBuffers.last().group = mGroup;
+   mDesc.setCullMode(GFXCullNone);
+   mDesc.setBlend(true);
+}
+
+void duDebugDrawTorque::beginGroup(U32 group)
+{
+   mGroup = group;
+}
+
+/// Submit a vertex
+/// @param pos [in] position of the verts.
+/// @param color [in] color of the verts.
+void duDebugDrawTorque::vertex(const float* pos, unsigned int color)
+{
+   vertex(pos[0], pos[1], pos[2], color);
+}
+
+/// Submit a vertex
+/// @param x,y,z [in] position of the verts.
+/// @param color [in] color of the verts.
+void duDebugDrawTorque::vertex(const float x, const float y, const float z, unsigned int color)
+{
+   if(mQuadsMode)
+   {
+      if(mVertCount == 3)
+      {
+         _vertex(x, -z, y, color);
+         _vertex(mStore[0][0], mStore[0][1], mStore[0][2], color);
+         _vertex(mStore[1][0], mStore[1][1], mStore[1][2], color);
+         _vertex(mStore[1][0], mStore[1][1], mStore[1][2], color);
+         _vertex(mStore[2][0], mStore[2][1], mStore[2][2], color);
+         _vertex(x, -z, y, color);
+         mVertCount = 0;
+      }
+      else
+      {
+         mStore[mVertCount][0] = x;
+         mStore[mVertCount][1] = -z;
+         mStore[mVertCount][2] = y;
+         mVertCount++;
+      }
+   }
+   else 
+   {
+      _vertex(x, -z, y, color);
+   }
+}
+
+/// Submit a vertex
+/// @param pos [in] position of the verts.
+/// @param color [in] color of the verts.
+void duDebugDrawTorque::vertex(const float* pos, unsigned int color, const float* uv)
+{
+   vertex(pos[0], pos[1], pos[2], color);
+}
+
+/// Submit a vertex
+/// @param x,y,z [in] position of the verts.
+/// @param color [in] color of the verts.
+void duDebugDrawTorque::vertex(const float x, const float y, const float z, unsigned int color, const float u, const float v)
+{
+   vertex(x, y, z, color);
+}
+
+/// Push a vertex onto the buffer.
+void duDebugDrawTorque::_vertex(const float x, const float y, const float z, unsigned int color)
+{
+   // Use override color if we must.
+   //if(mOverride)
+      //color = mOverrideColor;
+   if(mCurrColor != color || !mBuffers.last().buffer.size())
+   {
+      U8 r, g, b, a;
+      // Convert color integer to components.
+      rcCol(color, r, g, b, a);
+      mBuffers.last().buffer.push_back(Instruction(r, g, b, a));
+      mCurrColor = color;
+   }
+   // Construct vertex data.
+   mBuffers.last().buffer.push_back(Instruction(x, y, z));
+}
+
+/// End drawing primitives.
+void duDebugDrawTorque::end()
+{
+}
+
+void duDebugDrawTorque::overrideColor(unsigned int col)
+{
+   mOverride = true;
+   mOverrideColor = col;
+}
+
+void duDebugDrawTorque::cancelOverride()
+{
+   mOverride = false;
+}
+
+void duDebugDrawTorque::renderBuffer(Buffer &b)
+{
+   PrimBuild::begin(b.primType, b.buffer.size());
+   Vector<Instruction> &buf = b.buffer;
+   for(U32 i = 0; i < buf.size(); i++)
+   {
+      switch(buf[i].type)
+      {
+      case Instruction::POINT:
+         PrimBuild::vertex3f(buf[i].data.point.x,
+                             buf[i].data.point.y,
+                             buf[i].data.point.z);
+         break;
+
+      case Instruction::COLOR:
+         if(mOverride)
+            break;
+         PrimBuild::color4i(buf[i].data.color.r,
+                            buf[i].data.color.g,
+                            buf[i].data.color.b,
+                            buf[i].data.color.a);
+         break;
+      }
+   }
+   PrimBuild::end();
+}
+
+void duDebugDrawTorque::render()
+{
+   GFXStateBlockRef sb = GFX->createStateBlock(mDesc);
+   GFX->setStateBlock(sb);
+   // Use override color for all rendering.
+   if(mOverride)
+   {
+      U8 r, g, b, a;
+      rcCol(mOverrideColor, r, g, b, a);
+      PrimBuild::color4i(r, g, b, a);
+   }
+   for(U32 b = 0; b < mBuffers.size(); b++)
+   {
+      renderBuffer(mBuffers[b]);
+   }
+}
+
+void duDebugDrawTorque::renderGroup(U32 group)
+{
+   GFXStateBlockRef sb = GFX->createStateBlock(mDesc);
+   GFX->setStateBlock(sb);
+   // Use override color for all rendering.
+   if(mOverride)
+   {
+      U8 r, g, b, a;
+      rcCol(mOverrideColor, r, g, b, a);
+      PrimBuild::color4i(r, g, b, a);
+   }
+   for(U32 b = 0; b < mBuffers.size(); b++)
+   {
+      if(mBuffers[b].group == group)
+         renderBuffer(mBuffers[b]);
+   }
+}
+
+void duDebugDrawTorque::clear()
+{
+   for(U32 b = 0; b < mBuffers.size(); b++)
+      mBuffers[b].buffer.clear();
+   mBuffers.clear();
+}
diff --git a/Engine/source/navigation/duDebugDrawTorque.h b/Engine/source/navigation/duDebugDrawTorque.h
new file mode 100644
index 000000000..f6a1a1e3e
--- /dev/null
+++ b/Engine/source/navigation/duDebugDrawTorque.h
@@ -0,0 +1,157 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#ifndef _DU_DEBUG_DRAW_TORQUE_H_
+#define _DU_DEBUG_DRAW_TORQUE_H_
+
+#include "core/util/tVector.h"
+#include <DebugDraw.h>
+#include "gfx/gfxStateBlock.h"
+
+/// @brief Implements the duDebugDraw interface in Torque.
+class duDebugDrawTorque: public duDebugDraw {
+public:
+   duDebugDrawTorque();
+   ~duDebugDrawTorque();
+
+   /// Enable/disable Z read.
+   void depthMask(bool state);
+
+   /// Enable/disable texturing. Not used.
+   void texture(bool state);
+
+   /// Special colour overwrite for when I get picky about the colours Mikko chose.
+   void overrideColor(unsigned int col);
+
+   /// Stop the colour override.
+   void cancelOverride();
+
+   /// Begin drawing primitives.
+   /// @param prim [in] primitive type to draw, one of rcDebugDrawPrimitives.
+   /// @param size [in] size of a primitive, applies to point size and line width only.
+   void begin(duDebugDrawPrimitives prim, float size = 1.0f);
+
+   /// All new buffers go into this group.
+   void beginGroup(U32 group);
+
+   /// Submit a vertex
+   /// @param pos [in] position of the verts.
+   /// @param color [in] color of the verts.
+   void vertex(const float* pos, unsigned int color);
+
+   /// Submit a vertex
+   /// @param x,y,z [in] position of the verts.
+   /// @param color [in] color of the verts.
+   void vertex(const float x, const float y, const float z, unsigned int color);
+
+   /// Submit a vertex
+   /// @param pos [in] position of the verts.
+   /// @param color [in] color of the verts.
+   void vertex(const float* pos, unsigned int color, const float* uv);
+
+   /// Submit a vertex
+   /// @param x,y,z [in] position of the verts.
+   /// @param color [in] color of the verts.
+   void vertex(const float x, const float y, const float z, unsigned int color, const float u, const float v);
+
+   /// End drawing primitives.
+   void end();
+
+   /// Render buffered primitive.
+   void render();
+
+   /// Render buffered primitives in a group.
+   void renderGroup(U32 group);
+
+   /// Delete buffered primitive.
+   void clear();
+      
+private:
+   GFXStateBlockDesc mDesc;
+
+   U32 mPrimType;
+   bool mQuadsMode;
+
+   U32 mVertCount;
+   F32 mStore[3][3];
+
+   U32 mGroup;
+
+   struct Instruction {
+      // Contain either a point or a color command.
+      union {
+         struct {
+            U8 r, g, b, a;
+         } color;
+         struct {
+            float x, y, z;
+         } point;
+         U32 primType;
+      } data;
+      // Which type of data do we store?
+      enum {
+         COLOR,
+         POINT,
+         PRIMTYPE,
+      } type;
+      // Construct as color instruction.
+      Instruction(U8 r, U8 g, U8 b, U8 a) {
+         type = COLOR;
+         data.color.r = r;
+         data.color.g = g;
+         data.color.b = b;
+         data.color.a = a;
+      }
+      // Construct as point.
+      Instruction(float x, float y, float z) {
+         type = POINT;
+         data.point.x = x;
+         data.point.y = y;
+         data.point.z = z;
+      }
+      Instruction(U32 t = 0) {
+         type = PRIMTYPE;
+         data.primType = t;
+      }
+   };
+
+   struct Buffer {
+      U32 group;
+      Vector<Instruction> buffer;
+      GFXPrimitiveType primType;
+      Buffer(U32 type = 0) {
+         primType = (GFXPrimitiveType)type;
+         group = 0;
+      }
+   };
+   Vector<Buffer> mBuffers;
+
+   U32 mCurrColor;
+   U32 mOverrideColor;
+   bool mOverride;
+
+   void _vertex(const float x, const float y, const float z, unsigned int color);
+
+   void renderBuffer(Buffer &b);
+};
+
+#endif
diff --git a/Engine/source/navigation/navMesh.cpp b/Engine/source/navigation/navMesh.cpp
new file mode 100644
index 000000000..bdbde4f7e
--- /dev/null
+++ b/Engine/source/navigation/navMesh.cpp
@@ -0,0 +1,939 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#include <stdio.h>
+
+#include "navMesh.h"
+#include <DetourDebugDraw.h>
+#include <RecastDebugDraw.h>
+
+#include "math/mathUtils.h"
+#include "math/mRandom.h"
+#include "console/consoleTypes.h"
+#include "console/engineAPI.h"
+#include "console/typeValidators.h"
+
+#include "scene/sceneRenderState.h"
+#include "gfx/gfxDrawUtil.h"
+#include "renderInstance/renderPassManager.h"
+#include "gfx/primBuilder.h"
+
+#include "core/stream/bitStream.h"
+#include "math/mathIO.h"
+
+extern bool gEditingMission;
+
+IMPLEMENT_CO_NETOBJECT_V1(NavMesh);
+
+const U32 NavMesh::mMaxVertsPerPoly = 3;
+
+NavMesh::NavMesh()
+{
+   mTypeMask |= StaticShapeObjectType | MarkerObjectType;
+   mFileName = StringTable->insert("");
+   mNetFlags.clear(Ghostable);
+
+   nm = NULL;
+
+   dMemset(&cfg, 0, sizeof(cfg));
+   mCellSize = mCellHeight = 0.2f;
+   mWalkableHeight = 2.0f;
+   mWalkableClimb = 0.3f;
+   mWalkableRadius = 0.5f;
+   mWalkableSlope = 40.0f;
+   mBorderSize = 1;
+   mDetailSampleDist = 6.0f;
+   mDetailSampleMaxError = 1.0f;
+   mMaxEdgeLen = 12;
+   mMaxSimplificationError = 1.3f;
+   mMinRegionArea = 8;
+   mMergeRegionArea = 20;
+   mTileSize = 10.0f;
+   mMaxPolysPerTile = 128;
+
+   mAlwaysRender = false;
+
+   mBuilding = false;
+}
+
+NavMesh::~NavMesh()
+{
+   dtFreeNavMesh(nm);
+   nm = NULL;
+}
+
+bool NavMesh::setProtectedDetailSampleDist(void *obj, const char *index, const char *data)
+{
+   F32 dist = dAtof(data);
+   if(dist == 0.0f || dist >= 0.9f)
+      return true;
+   Con::errorf("NavMesh::detailSampleDist must be 0 or greater than 0.9!");
+   return false;
+}
+
+bool NavMesh::setProtectedAlwaysRender(void *obj, const char *index, const char *data)
+{
+   NavMesh *mesh = static_cast<NavMesh*>(obj);
+   bool always = dAtob(data);
+   if(always)
+   {
+      if(!gEditingMission)
+         mesh->mNetFlags.set(Ghostable);
+   }
+   else
+   {
+      if(!gEditingMission)
+         mesh->mNetFlags.clear(Ghostable);
+   }
+   mesh->mAlwaysRender = always;
+   mesh->setMaskBits(LoadFlag);
+   return true;
+}
+
+FRangeValidator ValidCellSize(0.01f, 10.0f);
+FRangeValidator ValidSlopeAngle(0.0f, 89.9f);
+IRangeValidator PositiveInt(0, S32_MAX);
+IRangeValidator NaturalNumber(1, S32_MAX);
+FRangeValidator CornerAngle(0.0f, 90.0f);
+
+void NavMesh::initPersistFields()
+{
+   addGroup("NavMesh Options");
+
+   addField("fileName", TypeString, Offset(mFileName, NavMesh),
+      "Name of the data file to store this navmesh in (relative to engine executable).");
+
+   addFieldV("cellSize", TypeF32, Offset(mCellSize, NavMesh), &ValidCellSize,
+      "Length/width of a voxel.");
+   addFieldV("cellHeight", TypeF32, Offset(mCellHeight, NavMesh), &ValidCellSize,
+      "Height of a voxel.");
+   addFieldV("tileSize", TypeF32, Offset(mTileSize, NavMesh), &CommonValidators::PositiveNonZeroFloat,
+      "The horizontal size of tiles.");
+
+   addFieldV("actorHeight", TypeF32, Offset(mWalkableHeight, NavMesh), &CommonValidators::PositiveFloat,
+      "Height of an actor.");
+   addFieldV("actorClimb", TypeF32, Offset(mWalkableClimb, NavMesh), &CommonValidators::PositiveFloat,
+      "Maximum climbing height of an actor.");
+   addFieldV("actorRadius", TypeF32, Offset(mWalkableRadius, NavMesh), &CommonValidators::PositiveFloat,
+      "Radius of an actor.");
+   addFieldV("walkableSlope", TypeF32, Offset(mWalkableSlope, NavMesh), &ValidSlopeAngle,
+      "Maximum walkable slope in degrees.");
+
+   endGroup("NavMesh Options");
+
+   addGroup("NavMesh Rendering");
+
+   addProtectedField("alwaysRender", TypeBool, Offset(mAlwaysRender, NavMesh),
+      &setProtectedAlwaysRender, &defaultProtectedGetFn,
+      "Display this NavMesh even outside the editor.");
+
+   endGroup("NavMesh Rendering");
+
+   addGroup("NavMesh Advanced Options");
+
+   addFieldV("borderSize", TypeS32, Offset(mBorderSize, NavMesh), &PositiveInt,
+      "Size of the non-walkable border around the navigation mesh (in voxels).");
+   addProtectedField("detailSampleDist", TypeF32, Offset(mDetailSampleDist, NavMesh),
+      &setProtectedDetailSampleDist, &defaultProtectedGetFn,
+      "Sets the sampling distance to use when generating the detail mesh.");
+   addFieldV("detailSampleError", TypeF32, Offset(mDetailSampleMaxError, NavMesh), &CommonValidators::PositiveFloat,
+      "The maximum distance the detail mesh surface should deviate from heightfield data.");
+   addFieldV("maxEdgeLen", TypeS32, Offset(mDetailSampleDist, NavMesh), &PositiveInt,
+      "The maximum allowed length for contour edges along the border of the mesh.");
+   addFieldV("simplificationError", TypeF32, Offset(mMaxSimplificationError, NavMesh), &CommonValidators::PositiveFloat,
+      "The maximum distance a simplfied contour's border edges should deviate from the original raw contour.");
+   addFieldV("minRegionArea", TypeS32, Offset(mMinRegionArea, NavMesh), &PositiveInt,
+      "The minimum number of cells allowed to form isolated island areas.");
+   addFieldV("mergeRegionArea", TypeS32, Offset(mMergeRegionArea, NavMesh), &PositiveInt,
+      "Any regions with a span count smaller than this value will, if possible, be merged with larger regions.");
+   addFieldV("maxPolysPerTile", TypeS32, Offset(mMaxPolysPerTile, NavMesh), &NaturalNumber,
+      "The maximum number of polygons allowed in a tile.");
+
+   endGroup("NavMesh Advanced Options");
+
+   Parent::initPersistFields();
+}
+
+bool NavMesh::onAdd()
+{
+   if(!Parent::onAdd())
+      return false;
+
+   mObjBox.set(Point3F(-10.0f, -10.0f, -1.0f),
+      Point3F( 10.0f,  10.0f,  1.0f));
+   resetWorldBox();
+
+   addToScene();
+
+   if(gEditingMission || mAlwaysRender)
+   {
+      mNetFlags.set(Ghostable);
+      if(isClientObject())
+         renderToDrawer();
+   }
+
+   if(isServerObject())
+   {
+      setProcessTick(true);
+   }
+
+   load();
+
+   return true;
+}
+
+void NavMesh::onRemove()
+{
+   removeFromScene();
+
+   Parent::onRemove();
+}
+
+void NavMesh::setTransform(const MatrixF &mat)
+{
+   Parent::setTransform(mat);
+}
+
+void NavMesh::setScale(const VectorF &scale)
+{
+   Parent::setScale(scale);
+}
+
+bool NavMesh::build(bool background, bool saveIntermediates)
+{
+   if(mBuilding)
+      cancelBuild();
+
+   mBuilding = true;
+
+   dtFreeNavMesh(nm);
+   // Allocate a new navmesh.
+   nm = dtAllocNavMesh();
+   if(!nm)
+   {
+      Con::errorf("Could not allocate dtNavMesh for NavMesh %s", getIdString());
+      return false;
+   }
+
+   updateConfig();
+
+   // Build navmesh parameters from console members.
+   dtNavMeshParams params;
+   rcVcopy(params.orig, cfg.bmin);
+   params.tileWidth = cfg.tileSize * mCellSize;
+   params.tileHeight = cfg.tileSize * mCellSize;
+   params.maxTiles = mCeil(getWorldBox().len_x() / params.tileWidth) * mCeil(getWorldBox().len_y() / params.tileHeight);
+   params.maxPolys = mMaxPolysPerTile;
+
+   // Initialise our navmesh.
+   if(dtStatusFailed(nm->init(&params)))
+   {
+      Con::errorf("Could not init dtNavMesh for NavMesh %s", getIdString());
+      return false;
+   }
+
+   updateTiles(true);
+
+   if(!background)
+   {
+      while(mDirtyTiles.size())
+         buildNextTile();
+   }
+
+   return true;
+}
+
+DefineEngineMethod(NavMesh, build, bool, (bool background, bool save), (true, false),
+   "@brief Create a Recast nav mesh.")
+{
+   return object->build(background, save);
+}
+
+void NavMesh::cancelBuild()
+{
+   while(mDirtyTiles.size()) mDirtyTiles.pop();
+   mBuilding = false;
+}
+
+DefineEngineMethod(NavMesh, cancelBuild, void, (),,
+   "@brief Cancel the current NavMesh build.")
+{
+   object->cancelBuild();
+}
+
+void NavMesh::inspectPostApply()
+{
+   if(mBuilding)
+      cancelBuild();
+}
+
+void NavMesh::updateConfig()
+{
+   // Build rcConfig object from our console members.
+   dMemset(&cfg, 0, sizeof(cfg));
+   cfg.cs = mCellSize;
+   cfg.ch = mCellHeight;
+   Box3F box = DTStoRC(getWorldBox());
+   rcVcopy(cfg.bmin, box.minExtents);
+   rcVcopy(cfg.bmax, box.maxExtents);
+   rcCalcGridSize(cfg.bmin, cfg.bmax, cfg.cs, &cfg.width, &cfg.height);
+
+   cfg.walkableHeight = mCeil(mWalkableHeight / mCellHeight);
+   cfg.walkableClimb = mCeil(mWalkableClimb / mCellHeight);
+   cfg.walkableRadius = mCeil(mWalkableRadius / mCellSize);
+   cfg.walkableSlopeAngle = mWalkableSlope;
+   cfg.borderSize = cfg.walkableRadius + 3;
+
+   cfg.detailSampleDist = mDetailSampleDist;
+   cfg.detailSampleMaxError = mDetailSampleMaxError;
+   cfg.maxEdgeLen = mMaxEdgeLen;
+   cfg.maxSimplificationError = mMaxSimplificationError;
+   cfg.maxVertsPerPoly = mMaxVertsPerPoly;
+   cfg.minRegionArea = mMinRegionArea;
+   cfg.mergeRegionArea = mMergeRegionArea;
+   cfg.tileSize = mTileSize / cfg.cs;
+}
+
+S32 NavMesh::getTile(Point3F pos)
+{
+   if(mBuilding)
+      return -1;
+   for(U32 i = 0; i < mTiles.size(); i++)
+   {
+      if(mTiles[i].box.isContained(pos))
+         return i;
+   }
+   return -1;
+}
+
+Box3F NavMesh::getTileBox(U32 id)
+{
+   if(mBuilding || id >= mTiles.size())
+      return Box3F::Invalid;
+   return mTiles[id].box;
+}
+
+void NavMesh::updateTiles(bool dirty)
+{
+   if(!isProperlyAdded())
+      return;
+
+   mTiles.clear();
+   while(mDirtyTiles.size()) mDirtyTiles.pop();
+
+   const Box3F &box = DTStoRC(getWorldBox());
+   if(box.isEmpty())
+      return;
+
+   updateConfig();
+
+   // Calculate tile dimensions.
+   const U32 ts = cfg.tileSize;
+   const U32 tw = (cfg.width  + ts-1) / ts;
+   const U32 th = (cfg.height + ts-1) / ts;
+   const F32 tcs = cfg.tileSize * cfg.cs;
+
+   // Iterate over tiles.
+   F32 tileBmin[3], tileBmax[3];
+   for(U32 y = 0; y < th; ++y)
+   {
+      for(U32 x = 0; x < tw; ++x)
+      {
+         tileBmin[0] = cfg.bmin[0] + x*tcs;
+         tileBmin[1] = cfg.bmin[1];
+         tileBmin[2] = cfg.bmin[2] + y*tcs;
+
+         tileBmax[0] = cfg.bmin[0] + (x+1)*tcs;
+         tileBmax[1] = cfg.bmax[1];
+         tileBmax[2] = cfg.bmin[2] + (y+1)*tcs;
+
+         mTiles.push_back(
+            Tile(RCtoDTS(tileBmin, tileBmax),
+                  x, y,
+                  tileBmin, tileBmax));
+
+         if(dirty)
+            mDirtyTiles.push(mTiles.size() - 1);
+      }
+   }
+}
+
+void NavMesh::processTick(const Move *move)
+{
+   buildNextTile();
+}
+
+void NavMesh::buildNextTile()
+{
+   if(mDirtyTiles.size())
+   {
+      // Pop a single dirty tile and process it.
+      U32 i = mDirtyTiles.front();
+      mDirtyTiles.pop();
+      const Tile &tile = mTiles[i];
+      // Intermediate data for tile build.
+      TileData tempdata;
+      // Generate navmesh for this tile.
+      U32 dataSize = 0;
+      unsigned char* data = buildTileData(tile, tempdata, dataSize);
+      if(data)
+      {
+         // Remove any previous data.
+         nm->removeTile(nm->getTileRefAt(tile.x, tile.y, 0), 0, 0);
+         // Add new data (navmesh owns and deletes the data).
+         dtStatus status = nm->addTile(data, dataSize, DT_TILE_FREE_DATA, 0, 0);
+         int success = 1;
+         if(dtStatusFailed(status))
+         {
+            success = 0;
+            dtFree(data);
+         }
+      }
+      // Did we just build the last tile?
+      if(!mDirtyTiles.size())
+      {
+         mBuilding = false;
+      }
+      setMaskBits(BuildFlag);
+   }
+}
+
+static void buildCallback(SceneObject* object,void *key)
+{
+   SceneContainer::CallbackInfo* info = reinterpret_cast<SceneContainer::CallbackInfo*>(key);
+   object->buildPolyList(info->context,info->polyList,info->boundingBox,info->boundingSphere);
+}
+
+unsigned char *NavMesh::buildTileData(const Tile &tile, TileData &data, U32 &dataSize)
+{
+   // Push out tile boundaries a bit.
+   F32 tileBmin[3], tileBmax[3];
+   rcVcopy(tileBmin, tile.bmin);
+   rcVcopy(tileBmax, tile.bmax);
+   tileBmin[0] -= cfg.borderSize * cfg.cs;
+   tileBmin[2] -= cfg.borderSize * cfg.cs;
+   tileBmax[0] += cfg.borderSize * cfg.cs;
+   tileBmax[2] += cfg.borderSize * cfg.cs;
+
+   // Parse objects from level into RC-compatible format.
+   Box3F box = RCtoDTS(tileBmin, tileBmax);
+   SceneContainer::CallbackInfo info;
+   info.context = PLC_Navigation;
+   info.boundingBox = box;
+   info.polyList = &data.geom;
+   getContainer()->findObjects(box, StaticObjectType, buildCallback, &info);
+
+   // Check for no geometry.
+   if(!data.geom.getVertCount())
+      return false;
+
+   // Figure out voxel dimensions of this tile.
+   U32 width = 0, height = 0;
+   width = cfg.tileSize + cfg.borderSize * 2;
+   height = cfg.tileSize + cfg.borderSize * 2;
+
+   // Create a dummy context.
+   rcContext ctx(false);
+
+   // Create a heightfield to voxelise our input geometry.
+   data.hf = rcAllocHeightfield();
+   if(!data.hf)
+   {
+      Con::errorf("Out of memory (rcHeightField) for NavMesh %s", getIdString());
+      return NULL;
+   }
+   if(!rcCreateHeightfield(&ctx, *data.hf, width, height, tileBmin, tileBmax, cfg.cs, cfg.ch))
+   {
+      Con::errorf("Could not generate rcHeightField for NavMesh %s", getIdString());
+      return NULL;
+   }
+
+   unsigned char *areas = new unsigned char[data.geom.getTriCount()];
+   if(!areas)
+   {
+      Con::errorf("Out of memory (area flags) for NavMesh %s", getIdString());
+      return NULL;
+   }
+   dMemset(areas, 0, data.geom.getTriCount() * sizeof(unsigned char));
+
+   // Filter triangles by angle and rasterize.
+   rcMarkWalkableTriangles(&ctx, cfg.walkableSlopeAngle,
+      data.geom.getVerts(), data.geom.getVertCount(),
+      data.geom.getTris(), data.geom.getTriCount(), areas);
+   rcRasterizeTriangles(&ctx, data.geom.getVerts(), data.geom.getVertCount(),
+      data.geom.getTris(), areas, data.geom.getTriCount(),
+      *data.hf, cfg.walkableClimb);
+
+   delete[] areas;
+
+   // Filter out areas with low ceilings and other stuff.
+   rcFilterLowHangingWalkableObstacles(&ctx, cfg.walkableClimb, *data.hf);
+   rcFilterLedgeSpans(&ctx, cfg.walkableHeight, cfg.walkableClimb, *data.hf);
+   rcFilterWalkableLowHeightSpans(&ctx, cfg.walkableHeight, *data.hf);
+
+   data.chf = rcAllocCompactHeightfield();
+   if(!data.chf)
+   {
+      Con::errorf("Out of memory (rcCompactHeightField) for NavMesh %s", getIdString());
+      return NULL;
+   }
+   if(!rcBuildCompactHeightfield(&ctx, cfg.walkableHeight, cfg.walkableClimb, *data.hf, *data.chf))
+   {
+      Con::errorf("Could not generate rcCompactHeightField for NavMesh %s", getIdString());
+      return NULL;
+   }
+   if(!rcErodeWalkableArea(&ctx, cfg.walkableRadius, *data.chf))
+   {
+      Con::errorf("Could not erode walkable area for NavMesh %s", getIdString());
+      return NULL;
+   }
+
+   //--------------------------
+   // Todo: mark areas here.
+   //const ConvexVolume* vols = m_geom->getConvexVolumes();
+   //for (int i  = 0; i < m_geom->getConvexVolumeCount(); ++i)
+      //rcMarkConvexPolyArea(m_NULL, vols[i].verts, vols[i].nverts, vols[i].hmin, vols[i].hmax, (unsigned char)vols[i].area, *m_chf);
+   //--------------------------
+
+   if(false)
+   {
+      if(!rcBuildRegionsMonotone(&ctx, *data.chf, cfg.borderSize, cfg.minRegionArea, cfg.mergeRegionArea))
+      {
+         Con::errorf("Could not build regions for NavMesh %s", getIdString());
+         return NULL;
+      }
+   }
+   else
+   {
+      if(!rcBuildDistanceField(&ctx, *data.chf))
+      {
+         Con::errorf("Could not build distance field for NavMesh %s", getIdString());
+         return NULL;
+      }
+      if(!rcBuildRegions(&ctx, *data.chf, cfg.borderSize, cfg.minRegionArea, cfg.mergeRegionArea))
+      {
+         Con::errorf("Could not build regions for NavMesh %s", getIdString());
+         return NULL;
+      }
+   }
+
+   data.cs = rcAllocContourSet();
+   if(!data.cs)
+   {
+      Con::errorf("Out of memory (rcContourSet) for NavMesh %s", getIdString());
+      return NULL;
+   }
+   if(!rcBuildContours(&ctx, *data.chf, cfg.maxSimplificationError, cfg.maxEdgeLen, *data.cs))
+   {
+      Con::errorf("Could not construct rcContourSet for NavMesh %s", getIdString());
+      return NULL;
+   }
+   if(data.cs->nconts <= 0)
+   {
+      Con::errorf("No contours in rcContourSet for NavMesh %s", getIdString());
+      return NULL;
+   }
+
+   data.pm = rcAllocPolyMesh();
+   if(!data.pm)
+   {
+      Con::errorf("Out of memory (rcPolyMesh) for NavMesh %s", getIdString());
+      return NULL;
+   }
+   if(!rcBuildPolyMesh(&ctx, *data.cs, cfg.maxVertsPerPoly, *data.pm))
+   {
+      Con::errorf("Could not construct rcPolyMesh for NavMesh %s", getIdString());
+      return NULL;
+   }
+
+   data.pmd = rcAllocPolyMeshDetail();
+   if(!data.pmd)
+   {
+      Con::errorf("Out of memory (rcPolyMeshDetail) for NavMesh %s", getIdString());
+      return NULL;
+   }
+   if(!rcBuildPolyMeshDetail(&ctx, *data.pm, *data.chf, cfg.detailSampleDist, cfg.detailSampleMaxError, *data.pmd))
+   {
+      Con::errorf("Could not construct rcPolyMeshDetail for NavMesh %s", getIdString());
+      return NULL;
+   }
+
+   if(data.pm->nverts >= 0xffff)
+   {
+      Con::errorf("Too many vertices in rcPolyMesh for NavMesh %s", getIdString());
+      return NULL;
+   }
+   for(U32 i = 0; i < data.pm->npolys; i++)
+   {
+      if(data.pm->areas[i] == RC_WALKABLE_AREA)
+         data.pm->areas[i] = GroundArea;
+
+      if(data.pm->areas[i] == GroundArea)
+         data.pm->flags[i] |= WalkFlag;
+      if(data.pm->areas[i] == WaterArea)
+         data.pm->flags[i] |= SwimFlag;
+   }
+
+   unsigned char* navData = 0;
+   int navDataSize = 0;
+
+   dtNavMeshCreateParams params;
+   dMemset(&params, 0, sizeof(params));
+
+   params.verts = data.pm->verts;
+   params.vertCount = data.pm->nverts;
+   params.polys = data.pm->polys;
+   params.polyAreas = data.pm->areas;
+   params.polyFlags = data.pm->flags;
+   params.polyCount = data.pm->npolys;
+   params.nvp = data.pm->nvp;
+
+   params.detailMeshes = data.pmd->meshes;
+   params.detailVerts = data.pmd->verts;
+   params.detailVertsCount = data.pmd->nverts;
+   params.detailTris = data.pmd->tris;
+   params.detailTriCount = data.pmd->ntris;
+
+   params.walkableHeight = mWalkableHeight;
+   params.walkableRadius = mWalkableRadius;
+   params.walkableClimb = mWalkableClimb;
+   params.tileX = tile.x;
+   params.tileY = tile.y;
+   params.tileLayer = 0;
+   rcVcopy(params.bmin, data.pm->bmin);
+   rcVcopy(params.bmax, data.pm->bmax);
+   params.cs = cfg.cs;
+   params.ch = cfg.ch;
+   params.buildBvTree = true;
+
+   if(!dtCreateNavMeshData(&params, &navData, &navDataSize))
+   {
+      Con::errorf("Could not create dtNavMeshData for tile (%d, %d) of NavMesh %s",
+         tile.x, tile.y, getIdString());
+      return NULL;
+   }
+
+   dataSize = navDataSize;
+
+   return navData;
+}
+
+/// This method should never be called in a separate thread to the rendering
+/// or pathfinding logic. It directly replaces data in the dtNavMesh for
+/// this NavMesh object.
+void NavMesh::buildTiles(const Box3F &box)
+{
+   // Make sure we've already built or loaded.
+   if(!nm)
+      return;
+   // Iterate over tiles.
+   for(U32 i = 0; i < mTiles.size(); i++)
+   {
+      const Tile &tile = mTiles[i];
+      // Check tile box.
+      if(!tile.box.isOverlapped(box))
+         continue;
+      // Mark as dirty.
+      mDirtyTiles.push(i);
+   }
+}
+
+DefineEngineMethod(NavMesh, buildTiles, void, (Box3F box),,
+   "@brief Rebuild the tiles overlapped by the input box.")
+{
+   return object->buildTiles(box);
+}
+
+void NavMesh::buildTile(const U32 &tile)
+{
+   if(tile < mTiles.size())
+   {
+      mDirtyTiles.push(tile);
+   }
+}
+
+void NavMesh::renderToDrawer()
+{
+   dd.clear();
+   // Recast debug draw
+   NetObject *no = getServerObject();
+   if(no)
+   {
+      NavMesh *n = static_cast<NavMesh*>(no);
+
+      if(n->nm)
+      {
+         dd.beginGroup(0);
+         duDebugDrawNavMesh       (&dd, *n->nm, 0);
+         dd.beginGroup(1);
+         duDebugDrawNavMeshPortals(&dd, *n->nm);
+         dd.beginGroup(2);
+         duDebugDrawNavMeshBVTree (&dd, *n->nm);
+      }
+   }
+}
+
+void NavMesh::prepRenderImage(SceneRenderState *state)
+{
+   ObjectRenderInst *ri = state->getRenderPass()->allocInst<ObjectRenderInst>();
+   ri->renderDelegate.bind(this, &NavMesh::render);
+   ri->type = RenderPassManager::RIT_Object;
+   ri->translucentSort = true;
+   ri->defaultKey = 1;
+   state->getRenderPass()->addInst(ri);
+}
+
+void NavMesh::render(ObjectRenderInst *ri, SceneRenderState *state, BaseMatInstance *overrideMat)
+{
+   if(overrideMat)
+      return;
+
+   if(state->isReflectPass())
+      return;
+
+   PROFILE_SCOPE(NavMesh_Render);
+   
+   // Recast debug draw
+   NetObject *no = getServerObject();
+   if(no)
+   {
+      NavMesh *n = static_cast<NavMesh*>(no);
+
+      if(n->isSelected())
+      {
+         GFXDrawUtil *drawer = GFX->getDrawUtil();
+
+         GFXStateBlockDesc desc;
+         desc.setZReadWrite(true, false);
+         desc.setBlend(true);
+         desc.setCullMode(GFXCullNone);
+
+         drawer->drawCube(desc, getWorldBox(), n->mBuilding
+            ? ColorI(255, 0, 0, 80)
+            : ColorI(136, 228, 255, 45));
+         desc.setFillModeWireframe();
+         drawer->drawCube(desc, getWorldBox(), ColorI::BLACK);
+      }
+
+      if(n->mBuilding)
+      {
+         int alpha = 80;
+         if(!n->isSelected() || !Con::getBoolVariable("$Nav::EditorOpen"))
+            alpha = 20;
+         dd.overrideColor(duRGBA(255, 0, 0, alpha));
+      }
+      else
+      {
+         dd.cancelOverride();
+      }
+      
+      if((!gEditingMission && n->mAlwaysRender) || (gEditingMission && Con::getBoolVariable("$Nav::Editor::renderMesh", 1))) dd.renderGroup(0);
+      if(Con::getBoolVariable("$Nav::Editor::renderPortals")) dd.renderGroup(1);
+      if(Con::getBoolVariable("$Nav::Editor::renderBVTree"))  dd.renderGroup(2);
+   }
+}
+
+void NavMesh::onEditorEnable()
+{
+   mNetFlags.set(Ghostable);
+   if(isClientObject() && !mAlwaysRender)
+      addToScene();
+}
+
+void NavMesh::onEditorDisable()
+{
+   if(!mAlwaysRender)
+   {
+      mNetFlags.clear(Ghostable);
+      if(isClientObject())
+         removeFromScene();
+   }
+}
+
+U32 NavMesh::packUpdate(NetConnection *conn, U32 mask, BitStream *stream)
+{
+   U32 retMask = Parent::packUpdate(conn, mask, stream);
+
+   mathWrite(*stream, getTransform());
+   mathWrite(*stream, getScale());
+   stream->writeFlag(mAlwaysRender);
+
+   return retMask;
+}
+
+void NavMesh::unpackUpdate(NetConnection *conn, BitStream *stream)
+{
+   Parent::unpackUpdate(conn, stream);
+
+   mathRead(*stream, &mObjToWorld);
+   mathRead(*stream, &mObjScale);
+   mAlwaysRender = stream->readFlag();
+
+   setTransform(mObjToWorld);
+
+   renderToDrawer();
+}
+
+static const int NAVMESHSET_MAGIC = 'M'<<24 | 'S'<<16 | 'E'<<8 | 'T'; //'MSET';
+static const int NAVMESHSET_VERSION = 1;
+
+struct NavMeshSetHeader
+{
+   int magic;
+   int version;
+   int numTiles;
+   dtNavMeshParams params;
+};
+
+struct NavMeshTileHeader
+{
+   dtTileRef tileRef;
+   int dataSize;
+};
+
+bool NavMesh::load()
+{
+   if(!dStrlen(mFileName))
+      return false;
+
+   FILE* fp = fopen(mFileName, "rb");
+   if(!fp)
+      return false;
+
+   // Read header.
+   NavMeshSetHeader header;
+   fread(&header, sizeof(NavMeshSetHeader), 1, fp);
+   if(header.magic != NAVMESHSET_MAGIC)
+   {
+      fclose(fp);
+      return 0;
+   }
+   if(header.version != NAVMESHSET_VERSION)
+   {
+      fclose(fp);
+      return 0;
+   }
+
+   if(nm)
+      dtFreeNavMesh(nm);
+   nm = dtAllocNavMesh();
+   if(!nm)
+   {
+      fclose(fp);
+      return false;
+   }
+
+   dtStatus status = nm->init(&header.params);
+   if(dtStatusFailed(status))
+   {
+      fclose(fp);
+      return false;
+   }
+
+   // Read tiles.
+   for(U32 i = 0; i < header.numTiles; ++i)
+   {
+      NavMeshTileHeader tileHeader;
+      fread(&tileHeader, sizeof(tileHeader), 1, fp);
+      if(!tileHeader.tileRef || !tileHeader.dataSize)
+         break;
+
+      unsigned char* data = (unsigned char*)dtAlloc(tileHeader.dataSize, DT_ALLOC_PERM);
+      if(!data) break;
+      memset(data, 0, tileHeader.dataSize);
+      fread(data, tileHeader.dataSize, 1, fp);
+
+      nm->addTile(data, tileHeader.dataSize, DT_TILE_FREE_DATA, tileHeader.tileRef, 0);
+   }
+
+   fclose(fp);
+
+   updateTiles();
+
+   if(isServerObject())
+   {
+      setMaskBits(LoadFlag);
+   }
+
+   return true;
+}
+
+DefineEngineMethod(NavMesh, load, bool, (),,
+   "@brief Load this NavMesh from its file.")
+{
+   return object->load();
+}
+
+bool NavMesh::save()
+{
+   if(!dStrlen(mFileName) || !nm)
+      return false;
+
+   // Save our navmesh into a file to load from next time
+   FILE* fp = fopen(mFileName, "wb");
+   if(!fp)
+      return false;
+
+   // Store header.
+   NavMeshSetHeader header;
+   header.magic = NAVMESHSET_MAGIC;
+   header.version = NAVMESHSET_VERSION;
+   header.numTiles = 0;
+   for(U32 i = 0; i < nm->getMaxTiles(); ++i)
+   {
+      const dtMeshTile* tile = ((const dtNavMesh*)nm)->getTile(i);
+      if (!tile || !tile->header || !tile->dataSize) continue;
+      header.numTiles++;
+   }
+   memcpy(&header.params, nm->getParams(), sizeof(dtNavMeshParams));
+   fwrite(&header, sizeof(NavMeshSetHeader), 1, fp);
+
+   // Store tiles.
+   for(U32 i = 0; i < nm->getMaxTiles(); ++i)
+   {
+      const dtMeshTile* tile = ((const dtNavMesh*)nm)->getTile(i);
+      if(!tile || !tile->header || !tile->dataSize) continue;
+
+      NavMeshTileHeader tileHeader;
+      tileHeader.tileRef = nm->getTileRef(tile);
+      tileHeader.dataSize = tile->dataSize;
+      fwrite(&tileHeader, sizeof(tileHeader), 1, fp);
+
+      fwrite(tile->data, tile->dataSize, 1, fp);
+   }
+
+   fclose(fp);
+
+   return true;
+}
+
+DefineEngineMethod(NavMesh, save, void, (),,
+   "@brief Save this NavMesh to its file.")
+{
+   object->save();
+}
+
+void NavMesh::write(Stream &stream, U32 tabStop, U32 flags)
+{
+   save();
+   Parent::write(stream, tabStop, flags);
+}
diff --git a/Engine/source/navigation/navMesh.h b/Engine/source/navigation/navMesh.h
new file mode 100644
index 000000000..0684aedf0
--- /dev/null
+++ b/Engine/source/navigation/navMesh.h
@@ -0,0 +1,274 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#ifndef _NAVMESH_H_
+#define _NAVMESH_H_
+
+#include <queue>
+
+#include "torqueRecast.h"
+#include "scene/sceneObject.h"
+#include "recastPolyList.h"
+
+#include "duDebugDrawTorque.h"
+
+#include <Recast.h>
+#include <DetourNavMesh.h>
+#include <DetourNavMeshBuilder.h>
+#include <DebugDraw.h>
+
+/// @class NavMesh
+/// Represents a set of bounds within which a Recast navigation mesh is generated.
+/// @see NavMeshPolyList
+/// @see Trigger
+class NavMesh : public SceneObject {
+   typedef SceneObject Parent;
+   friend class NavPath;
+
+public:
+   /// @name NavMesh build
+   /// @{
+
+   /// Initiates the navmesh build process, which includes notifying the
+   /// clients and posting an event.
+   bool build(bool background = true, bool saveIntermediates = false);
+   /// Stop a build in progress.
+   void cancelBuild();
+
+   /// Save the navmesh to a file.
+   bool save();
+   /// Load a saved navmesh from a file.
+   bool load();
+
+   /// Instantly rebuild the tiles in the navmesh that overlap the box.
+   void buildTiles(const Box3F &box);
+
+   /// Instantly rebuild a specific tile.
+   void buildTile(const U32 &tile);
+
+   /// Data file to store this nav mesh in. (From engine executable dir.)
+   StringTableEntry mFileName;
+
+   /// Cell width and height.
+   F32 mCellSize, mCellHeight;
+   /// @name Actor data
+   /// @{
+   F32 mWalkableHeight,
+      mWalkableClimb,
+      mWalkableRadius,
+      mWalkableSlope;
+   /// @}
+   /// @name Generation data
+   /// @{
+   U32 mBorderSize;
+   F32 mDetailSampleDist, mDetailSampleMaxError;
+   U32 mMaxEdgeLen;
+   F32 mMaxSimplificationError;
+   static const U32 mMaxVertsPerPoly;
+   U32 mMinRegionArea;
+   U32 mMergeRegionArea;
+   F32 mTileSize;
+   U32 mMaxPolysPerTile;
+   /// @}
+
+   /// @}
+
+   /// Return the index of the tile included by this point.
+   S32 getTile(Point3F pos);
+
+   /// Return the box of a given tile.
+   Box3F getTileBox(U32 id);
+
+   /// @name SimObject
+   /// @{
+
+   virtual void onEditorEnable();
+   virtual void onEditorDisable();
+
+   void write(Stream &stream, U32 tabStop, U32 flags);
+
+   /// @}
+
+   /// @name SceneObject
+   /// @{
+
+   static void initPersistFields();
+
+   bool onAdd();
+   void onRemove();
+
+   enum flags {
+      BuildFlag    = Parent::NextFreeMask << 0,
+      LoadFlag     = Parent::NextFreeMask << 1,
+      NextFreeMask = Parent::NextFreeMask << 2,
+   };
+
+   U32 packUpdate(NetConnection *conn, U32 mask, BitStream *stream);
+   void unpackUpdate(NetConnection *conn, BitStream *stream);
+
+   void setTransform(const MatrixF &mat);
+   void setScale(const VectorF &scale);
+
+   /// @}
+
+   /// @name ProcessObject
+   /// @{
+
+   void processTick(const Move *move);
+
+   /// @}
+
+   /// @name Rendering
+   /// @{
+
+   void prepRenderImage(SceneRenderState *state);
+   void render(ObjectRenderInst *ri, SceneRenderState *state, BaseMatInstance *overrideMat);
+
+   bool mAlwaysRender;
+
+   /// @}
+
+   NavMesh();
+   ~NavMesh();
+   DECLARE_CONOBJECT(NavMesh);
+
+   void inspectPostApply();
+
+protected:
+
+   dtNavMesh const* getNavMesh() { return nm; }
+
+private:
+   /// Generates a navigation mesh for the collection of objects in this
+   /// mesh. Returns true if successful. Stores the created mesh in tnm.
+   bool generateMesh();
+
+   /// Builds the next tile in the dirty list.
+   void buildNextTile();
+
+   /// @name Tiles
+   /// @{
+
+   struct Tile {
+      /// Torque-space world box of this tile.
+      Box3F box;
+      /// Local coordinates of this box.
+      U32 x, y;
+      /// Recast min and max points.
+      F32 bmin[3], bmax[3];
+      /// Default constructor.
+      Tile() : box(Box3F::Invalid), x(0), y(0)
+      {
+         bmin[0] = bmin[1] = bmin[2] = bmax[0] = bmax[1] = bmax[2] = 0.0f;
+      }
+      /// Value constructor.
+      Tile(const Box3F &b, U32 _x, U32 _y, const F32 *min, const F32 *max)
+         : box(b), x(_x), y(_y)
+      {
+         rcVcopy(bmin, min);
+         rcVcopy(bmax, max);
+      }
+   };
+
+   /// Intermediate data for tile creation.
+   struct TileData {
+      RecastPolyList          geom;
+      rcHeightfield        *hf;
+      rcCompactHeightfield *chf;
+      rcContourSet         *cs;
+      rcPolyMesh           *pm;
+      rcPolyMeshDetail     *pmd;
+      TileData()
+      {
+         hf = NULL;
+         chf = NULL;
+         cs = NULL;
+         pm = NULL;
+         pmd = NULL;
+      }
+      void freeAll()
+      {
+         geom.clear();
+         rcFreeHeightField(hf);
+         rcFreeCompactHeightfield(chf);
+         rcFreeContourSet(cs);
+         rcFreePolyMesh(pm);
+         rcFreePolyMeshDetail(pmd);
+      }
+      ~TileData()
+      {
+         freeAll();
+      }
+   };
+
+   /// List of tiles.
+   Vector<Tile> mTiles;
+
+   /// List of indices to the tile array which are dirty.
+   std::queue<U32> mDirtyTiles;
+
+   /// Update tile dimensions.
+   void updateTiles(bool dirty = false);
+
+   /// Generates navmesh data for a single tile.
+   unsigned char *buildTileData(const Tile &tile, TileData &data, U32 &dataSize);
+
+   /// @}
+
+   /// @name Intermediate data
+   /// @{
+
+   /// Config struct.
+   rcConfig cfg;
+
+   /// Updates our config from console members.
+   void updateConfig();
+
+   dtNavMesh *nm;
+
+   /// @}
+
+   /// Used to perform non-standard validation. detailSampleDist can be 0, or >= 0.9.
+   static bool setProtectedDetailSampleDist(void *obj, const char *index, const char *data);
+
+   /// Updates the client when we check the alwaysRender option.
+   static bool setProtectedAlwaysRender(void *obj, const char *index, const char *data);
+
+   /// @name Threaded updates
+   /// @{
+
+   /// A simple flag to say we are building.
+   bool mBuilding;
+
+   /// @}
+
+   /// @name Rendering
+   /// @{
+
+   duDebugDrawTorque dd;
+
+   void renderToDrawer();
+
+   /// @}
+};
+
+#endif
diff --git a/Engine/source/navigation/navPath.cpp b/Engine/source/navigation/navPath.cpp
new file mode 100644
index 000000000..5e4978afc
--- /dev/null
+++ b/Engine/source/navigation/navPath.cpp
@@ -0,0 +1,625 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#include "torqueRecast.h"
+#include "navPath.h"
+
+#include "console/consoleTypes.h"
+#include "console/engineAPI.h"
+#include "console/typeValidators.h"
+
+#include "scene/sceneRenderState.h"
+#include "gfx/gfxDrawUtil.h"
+#include "renderInstance/renderPassManager.h"
+#include "gfx/primBuilder.h"
+#include "core/stream/bitStream.h"
+#include "math/mathIO.h"
+
+#include <DetourDebugDraw.h>
+
+extern bool gEditingMission;
+
+IMPLEMENT_CO_NETOBJECT_V1(NavPath);
+
+NavPath::NavPath() :
+   mFrom(0.0f, 0.0f, 0.0f),
+   mTo(0.0f, 0.0f, 0.0f)
+{
+   mTypeMask |= MarkerObjectType;
+
+   mMesh = NULL;
+   mWaypoints = NULL;
+
+   mFrom.set(0, 0, 0);
+   mFromSet = false;
+   mTo.set(0, 0, 0);
+   mToSet = false;
+   mLength = 0.0f;
+
+   mIsLooping = false;
+
+   mAlwaysRender = false;
+   mXray = false;
+
+   mQuery = dtAllocNavMeshQuery();
+}
+
+NavPath::~NavPath()
+{
+   // Required for Detour.
+   dtFreeNavMeshQuery(mQuery);
+   mQuery = NULL;
+}
+
+bool NavPath::setProtectedMesh(void *obj, const char *index, const char *data)
+{
+   NavMesh *mesh = NULL;
+   NavPath *object = static_cast<NavPath*>(obj);
+
+   if(Sim::findObject(data, mesh))
+      object->mMesh = mesh;
+
+   return false;
+}
+
+const char *NavPath::getProtectedMesh(void *obj, const char *data)
+{
+   NavPath *object = static_cast<NavPath*>(obj);
+
+   if(object->mMesh.isNull())
+      return "";
+
+   if(object->mMesh->getName())
+      return object->mMesh->getName();
+   else
+      return object->mMesh->getIdString();
+}
+
+bool NavPath::setProtectedWaypoints(void *obj, const char *index, const char *data)
+{
+   SimPath::Path *points = NULL;
+   NavPath *object = static_cast<NavPath*>(obj);
+
+   if(Sim::findObject(data, points))
+   {
+      object->mWaypoints = points;
+      object->mIsLooping = points->isLooping();
+   }
+   else
+      object->mWaypoints = NULL;
+
+   return false;
+}
+
+bool NavPath::setProtectedFrom(void *obj, const char *index, const char *data)
+{
+   NavPath *object = static_cast<NavPath*>(obj);
+
+   if(dStrcmp(data, ""))
+   {
+      object->mFromSet = true;
+      return true;
+   }
+   else
+   {
+      object->mFromSet = false;
+      return false;
+   }
+}
+
+bool NavPath::setProtectedTo(void *obj, const char *index, const char *data)
+{
+   NavPath *object = static_cast<NavPath*>(obj);
+
+   if(dStrcmp(data, ""))
+   {
+      object->mToSet = true;
+      return true;
+   }
+   else
+   {
+      object->mToSet = false;
+      return false;
+   }
+}
+
+const char *NavPath::getProtectedFrom(void *obj, const char *data)
+{
+   NavPath *object = static_cast<NavPath*>(obj);
+
+   if(object->mFromSet)
+      return data;
+   else
+      return "";
+}
+
+const char *NavPath::getProtectedTo(void *obj, const char *data)
+{
+   NavPath *object = static_cast<NavPath*>(obj);
+
+   if(object->mToSet)
+      return data;
+   else
+      return "";
+}
+
+static IRangeValidator NaturalNumber(1, S32_MAX);
+
+void NavPath::initPersistFields()
+{
+   addGroup("NavPath");
+
+   addProtectedField("from", TypePoint3F, Offset(mFrom, NavPath),
+      &setProtectedFrom, &getProtectedFrom,
+      "World location this path starts at.");
+   addProtectedField("to", TypePoint3F, Offset(mTo, NavPath),
+      &setProtectedTo, &getProtectedTo,
+      "World location this path should end at.");
+
+   addProtectedField("mesh", TYPEID<NavMesh>(), Offset(mMesh, NavPath),
+      &setProtectedMesh, &getProtectedMesh,
+      "NavMesh object this path travels within.");
+   addProtectedField("waypoints", TYPEID<SimPath::Path>(), Offset(mWaypoints, NavPath),
+      &setProtectedWaypoints, &defaultProtectedGetFn,
+      "Path containing waypoints for this NavPath to visit.");
+
+   addField("isLooping", TypeBool, Offset(mIsLooping, NavPath),
+      "Does this path loop?");
+
+   endGroup("NavPath");
+
+   addGroup("NavPath Render");
+
+   addField("alwaysRender", TypeBool, Offset(mAlwaysRender, NavPath),
+      "Render this NavPath even when not selected.");
+   addField("xray", TypeBool, Offset(mXray, NavPath),
+      "Render this NavPath through other objects.");
+
+   endGroup("NavPath Render");
+
+   Parent::initPersistFields();
+}
+
+bool NavPath::onAdd()
+{
+   if(!Parent::onAdd())
+      return false;
+
+   // Ghost immediately if the editor's already open.
+   if(gEditingMission)
+      mNetFlags.set(Ghostable);
+
+   // Automatically find a path if we can.
+   if(isServerObject())
+      plan();
+
+   // Set initial world bounds and stuff.
+   resize();
+
+   // Finally, add us to the simulation.
+   addToScene();
+
+   return true;
+}
+
+void NavPath::onRemove()
+{
+   Parent::onRemove();
+
+   // Remove from simulation.
+   removeFromScene();
+}
+
+bool NavPath::init()
+{
+   // Check that enough data is provided.
+   if(mMesh.isNull() || !mMesh->getNavMesh())
+      return false;
+   if(!(mFromSet && mToSet) && !(!mWaypoints.isNull() && mWaypoints->size()))
+      return false;
+
+   // Initialise query in Detour.
+   if(dtStatusFailed(mQuery->init(mMesh->getNavMesh(), MaxPathLen)))
+      return false;
+
+   mPoints.clear();
+   mVisitPoints.clear();
+   mLength = 0.0f;
+
+   // Send path data to clients who are ghosting this object.
+   if(isServerObject())
+      setMaskBits(PathMask);
+
+   // Add points we need to visit in reverse order.
+   if(mWaypoints && mWaypoints->size())
+   {
+      // Add destination. For looping paths, that includes 'from'.
+      if(mIsLooping && mFromSet)
+         mVisitPoints.push_back(mFrom);
+      if(mToSet)
+         mVisitPoints.push_front(mTo);
+      // Add waypoints.
+      for(S32 i = mWaypoints->size() - 1; i >= 0; i--)
+      {
+         SceneObject *s = dynamic_cast<SceneObject*>(mWaypoints->at(i));
+         if(s)
+         {
+            mVisitPoints.push_back(s->getPosition());
+            // This is potentially slow, but safe.
+            if(!i && mIsLooping && !mFromSet)
+               mVisitPoints.push_front(s->getPosition());
+         }
+      }
+      // Add source (only ever specified by 'from').
+      if(mFromSet)
+         mVisitPoints.push_back(mFrom);
+   }
+   else
+   {
+      // Add (from,) to and from
+      if(mIsLooping)
+         mVisitPoints.push_back(mFrom);
+      mVisitPoints.push_back(mTo);
+      mVisitPoints.push_back(mFrom);
+   }
+
+   return true;
+}
+
+void NavPath::resize()
+{
+   if(!mPoints.size())
+   {
+      mObjBox.set(Point3F(-0.5f, -0.5f, -0.5f),
+                  Point3F( 0.5f,  0.5f,  0.5f));
+      resetWorldBox();
+      setTransform(MatrixF(true));
+      return;
+   }
+
+   // Grow a box to just fit over all our points.
+   Point3F max(mPoints[0]), min(mPoints[0]), pos(0.0f);
+   for(U32 i = 1; i < mPoints.size(); i++)
+   {
+      Point3F p = mPoints[i];
+      max.x = getMax(max.x, p.x);
+      max.y = getMax(max.y, p.y);
+      max.z = getMax(max.z, p.z);
+      min.x = getMin(min.x, p.x);
+      min.y = getMin(min.y, p.y);
+      min.z = getMin(min.z, p.z);
+      pos += p;
+   }
+   pos /= mPoints.size();
+   min -= Point3F(0.5f, 0.5f, 0.5f);
+   max += Point3F(0.5f, 0.5f, 0.5f);
+
+   mObjBox.set(min - pos, max - pos);
+   MatrixF mat = Parent::getTransform();
+   mat.setPosition(pos);
+   Parent::setTransform(mat);
+}
+
+bool NavPath::plan()
+{
+   if(!init())
+      return false;
+
+   visitNext();
+   while(update());
+
+   if(!finalise())
+      return false;
+
+   resize();
+
+   return true;
+}
+
+bool NavPath::visitNext()
+{
+   U32 s = mVisitPoints.size();
+   if(s < 2)
+      return false;
+
+   // Current leg of journey.
+   Point3F start = mVisitPoints[s-1];
+   Point3F end = mVisitPoints[s-2];
+
+   // Convert to Detour-friendly coordinates and data structures.
+   F32 from[] = {start.x, start.z, -start.y};
+   F32 to[] =   {end.x,   end.z,   -end.y};
+   F32 extents[] = {1.0f, 1.0f, 1.0f};
+   dtPolyRef startRef, endRef;
+
+   if(dtStatusFailed(mQuery->findNearestPoly(from, extents, &mFilter, &startRef, from)) || !startRef)
+   {
+      Con::errorf("No NavMesh polygon near visit point (%g, %g, %g) of NavPath %s",
+         start.x, start.y, start.z, getIdString());
+      return false;
+   }
+
+   if(dtStatusFailed(mQuery->findNearestPoly(to, extents, &mFilter, &endRef, to)) || !startRef)
+   {
+      Con::errorf("No NavMesh polygon near visit point (%g, %g, %g) of NavPath %s",
+         end.x, end.y, end.z, getIdString());
+      return false;
+   }
+
+   // Init sliced pathfind.
+   mStatus = mQuery->initSlicedFindPath(startRef, endRef, from, to, &mFilter);
+   if(dtStatusFailed(mStatus))
+      return false;
+
+   return true;
+}
+
+bool NavPath::update()
+{
+   // StatusInProgress means a query is underway.
+   if(dtStatusInProgress(mStatus))
+      mStatus = mQuery->updateSlicedFindPath(INT_MAX, NULL);
+   // StatusSucceeded means the query found its destination.
+   if(dtStatusSucceed(mStatus))
+   {
+      // Finalize the path. Need to use the static path length cap again.
+      dtPolyRef path[MaxPathLen];
+      S32 pathLen;
+      mStatus = mQuery->finalizeSlicedFindPath(path, &pathLen, MaxPathLen);
+      // Apparently stuff can go wrong during finalizing, so check the status again.
+      if(dtStatusSucceed(mStatus) && pathLen)
+      {
+         // These next few blocks are straight from Detour example code.
+         F32 straightPath[MaxPathLen * 3];
+         S32 straightPathLen;
+         dtPolyRef straightPathPolys[MaxPathLen];
+         U8 straightPathFlags[MaxPathLen];
+
+         U32 s = mVisitPoints.size();
+         Point3F start = mVisitPoints[s-1];
+         Point3F end = mVisitPoints[s-2];
+         F32 from[] = {start.x, start.z, -start.y};
+         F32 to[] =   {end.x,   end.z,   -end.y};
+
+         // Straightens out the path.
+         mQuery->findStraightPath(from, to, path, pathLen,
+            straightPath, straightPathFlags,
+            straightPathPolys, &straightPathLen, MaxPathLen);
+
+         // Convert Detour point path to list of Torque points.
+         s = mPoints.size();
+         mPoints.increment(straightPathLen);
+         for(U32 i = 0; i < straightPathLen; i++)
+         {
+            F32 *f = straightPath + i * 3;
+            mPoints[s + i] = RCtoDTS(f);
+            // Accumulate length if we're not the first vertex.
+            if(s > 0 || i > 0)
+               mLength += (mPoints[s+i] - mPoints[s+i-1]).len();
+         }
+
+         if(isServerObject())
+            setMaskBits(PathMask);
+      }
+      else
+         return false;
+      // Check to see where we still need to visit.
+      if(mVisitPoints.size() > 1)
+      {
+         //Next leg of the journey.
+         mVisitPoints.pop_back();
+         return visitNext();
+      }
+      else
+      {
+         // Finished!
+         return false;
+      }
+   }
+   else if(dtStatusFailed(mStatus))
+   {
+      // Something went wrong in planning.
+      return false;
+   }
+   return true;
+}
+
+bool NavPath::finalise()
+{
+   // Stop ticking.
+   setProcessTick(false);
+
+   // Reset world bounds and stuff.
+   resize();
+
+   return dtStatusSucceed(mStatus);
+}
+
+void NavPath::processTick(const Move *move)
+{
+   if(dtStatusInProgress(mStatus))
+      update();
+}
+
+Point3F NavPath::getNode(S32 idx)
+{
+   if(idx < getCount() && idx >= 0)
+      return mPoints[idx];
+   Con::errorf("Trying to access out-of-bounds path index %d (path length: %d)!", idx, getCount());
+   return Point3F(0,0,0);
+}
+
+S32 NavPath::getCount()
+{
+   return mPoints.size();
+}
+
+void NavPath::onEditorEnable()
+{
+   mNetFlags.set(Ghostable);
+}
+
+void NavPath::onEditorDisable()
+{
+   mNetFlags.clear(Ghostable);
+}
+
+void NavPath::inspectPostApply()
+{
+   plan();
+}
+
+void NavPath::onDeleteNotify(SimObject *obj)
+{
+   if(obj == (SimObject*)mMesh)
+   {
+      mMesh = NULL;
+      plan();
+   }
+}
+
+void NavPath::prepRenderImage(SceneRenderState *state)
+{
+   ObjectRenderInst *ri = state->getRenderPass()->allocInst<ObjectRenderInst>();
+   ri->renderDelegate.bind(this, &NavPath::renderSimple);
+   ri->type = RenderPassManager::RIT_Editor;      
+   ri->translucentSort = true;
+   ri->defaultKey = 1;
+   state->getRenderPass()->addInst(ri);
+}
+
+void NavPath::renderSimple(ObjectRenderInst *ri, SceneRenderState *state, BaseMatInstance *overrideMat)
+{
+   if(overrideMat)
+      return;
+
+   if(state->isReflectPass() || !(isSelected() || mAlwaysRender))
+      return;
+
+   GFXDrawUtil *drawer = GFX->getDrawUtil();
+   GFXStateBlockDesc desc;
+   desc.setZReadWrite(true, false);
+   desc.setBlend(true);
+   desc.setCullMode(GFXCullNone);
+
+   if(isSelected())
+   {
+      drawer->drawCube(desc, getWorldBox(), ColorI(136, 255, 228, 5));
+      desc.setFillModeWireframe();
+      drawer->drawCube(desc, getWorldBox(), ColorI::BLACK);
+   }
+
+   desc.setZReadWrite(!mXray, false);
+
+   ColorI pathColour(255, 0, 255);
+
+   if(!mIsLooping)
+   {
+      desc.setFillModeSolid();
+      if(mFromSet) drawer->drawCube(desc, Point3F(0.2f, 0.2f, 0.2f), mFrom, pathColour);
+      if(mToSet)   drawer->drawCube(desc, Point3F(0.2f, 0.2f, 0.2f), mTo, pathColour);
+   }
+
+   GFXStateBlockRef sb = GFX->createStateBlock(desc);
+   GFX->setStateBlock(sb);
+
+   PrimBuild::color3i(pathColour.red, pathColour.green, pathColour.blue);
+
+   PrimBuild::begin(GFXLineStrip, mPoints.size());
+   for (U32 i = 0; i < mPoints.size(); i++)
+      PrimBuild::vertex3fv(mPoints[i]);
+   PrimBuild::end();
+}
+
+U32 NavPath::packUpdate(NetConnection *conn, U32 mask, BitStream *stream)
+{
+   U32 retMask = Parent::packUpdate(conn, mask, stream);
+
+   stream->writeFlag(mIsLooping);
+   stream->writeFlag(mAlwaysRender);
+   stream->writeFlag(mXray);
+
+   if(stream->writeFlag(mFromSet))
+      mathWrite(*stream, mFrom);
+   if(stream->writeFlag(mToSet))
+      mathWrite(*stream, mTo);
+
+   if(stream->writeFlag(mask & PathMask))
+   {
+      stream->writeInt(mPoints.size(), 32);
+      for(U32 i = 0; i < mPoints.size(); i++)
+         mathWrite(*stream, mPoints[i]);
+   }
+
+   return retMask;
+}
+
+void NavPath::unpackUpdate(NetConnection *conn, BitStream *stream)
+{
+   Parent::unpackUpdate(conn, stream);
+
+   mIsLooping = stream->readFlag();
+   mAlwaysRender = stream->readFlag();
+   mXray = stream->readFlag();
+
+   if((mFromSet = stream->readFlag()) == true)
+      mathRead(*stream, &mFrom);
+   if((mToSet = stream->readFlag()) == true)
+      mathRead(*stream, &mTo);
+
+   if(stream->readFlag())
+   {
+      mPoints.clear();
+      mPoints.setSize(stream->readInt(32));
+      for(U32 i = 0; i < mPoints.size(); i++)
+      {
+         Point3F p;
+         mathRead(*stream, &p);
+         mPoints[i] = p;
+      }
+      resize();
+   }
+}
+
+DefineEngineMethod(NavPath, replan, bool, (),,
+   "@brief Find a path using the already-specified path properties.")
+{
+   return object->plan();
+}
+
+DefineEngineMethod(NavPath, getCount, S32, (),,
+   "@brief Return the number of nodes in this path.")
+{
+   return object->getCount();
+}
+
+DefineEngineMethod(NavPath, getNode, Point3F, (S32 idx),,
+   "@brief Get a specified node along the path.")
+{
+   return object->getNode(idx);
+}
+
+DefineEngineMethod(NavPath, getLength, F32, (),,
+   "@brief Get the length of this path in Torque units (i.e. the total distance it covers).")
+{
+   return object->getLength();
+}
diff --git a/Engine/source/navigation/navPath.h b/Engine/source/navigation/navPath.h
new file mode 100644
index 000000000..123a625d3
--- /dev/null
+++ b/Engine/source/navigation/navPath.h
@@ -0,0 +1,166 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#ifndef _NAVPATH_H_
+#define _NAVPATH_H_
+
+#include "scene/sceneObject.h"
+#include "scene/simPath.h"
+#include "navMesh.h"
+#include <DetourNavMeshQuery.h>
+
+class NavPath: public SceneObject {
+   typedef SceneObject Parent;
+   /// Maximum size of Detour path.
+   static const U32 MaxPathLen = 1024;
+
+public:
+   /// @name NavPath
+   /// Functions for planning and accessing the path.
+   /// @{
+
+   SimObjectPtr<NavMesh> mMesh;
+   SimObjectPtr<SimPath::Path> mWaypoints;
+
+   /// Location to start at.
+   Point3F mFrom;
+   /// Has a starting location been set?
+   bool mFromSet;
+   /// Location to end at.
+   Point3F mTo;
+   /// Has an end been set?
+   bool mToSet;
+
+   /// This path should include a segment from the end to the start.
+   bool mIsLooping;
+
+   /// Render even when not selected in the editor.
+   bool mAlwaysRender;
+   /// Render on top of other objects.
+   bool mXray;
+
+   /// Plan the path.
+   bool plan();
+
+   /// Updated a sliced plan.
+   /// @return True if we need to keep updating, false if we can stop.
+   bool update();
+
+   /// Finalise a sliced plan.
+   /// @return True if the plan was successful overall.
+   bool finalise();
+
+   /// @}
+
+   /// @name Path interface
+   /// @{
+
+   /// Return world-space position of a path node.
+   /// @param[in] idx Node index to retrieve.
+   Point3F getNode(S32 idx);
+
+   /// Return the number of nodes in this path.
+   S32 getCount();
+
+   /// Return the length of this path.
+   F32 getLength() { return mLength; };
+
+   /// @}
+
+   /// @name SceneObject
+   /// @{
+
+   static void initPersistFields();
+
+   bool onAdd();
+   void onRemove();
+
+   void onEditorEnable();
+   void onEditorDisable();
+   void inspectPostApply();
+
+   void onDeleteNotify(SimObject *object);
+
+   U32 packUpdate(NetConnection *conn, U32 mask, BitStream *stream);
+   void unpackUpdate(NetConnection *conn, BitStream *stream);
+
+   void prepRenderImage(SceneRenderState *state);
+   void renderSimple(ObjectRenderInst *ri, SceneRenderState *state, BaseMatInstance *overrideMat);
+
+   DECLARE_CONOBJECT(NavPath);
+
+   /// @}
+
+   /// @name ProcessObject
+   /// @{
+   void processTick(const Move *move);
+   /// @}
+
+   NavPath();
+   ~NavPath();
+
+protected:
+   enum masks {
+      PathMask     = Parent::NextFreeMask << 0,
+      NextFreeMask = Parent::NextFreeMask << 1
+   };
+
+private:
+   /// Create appropriate data structures and stuff.
+   bool init();
+
+   /// 'Visit' the most recent two points on our visit list.
+   bool visitNext();
+
+   /// Detour path query.
+   dtNavMeshQuery *mQuery;
+   /// Current status of our Detour query.
+   dtStatus mStatus;
+   /// Filter that provides the movement costs for paths.
+   dtQueryFilter mFilter;
+   
+   /// List of points the path should visit (waypoints, if you will).
+   Vector<Point3F> mVisitPoints;
+   /// List of points in the final path.
+   Vector<Point3F> mPoints;
+
+   /// Total length of path in world units.
+   F32 mLength;
+
+   /// Resets our world transform and bounds to fit our point list.
+   void resize();
+
+   /// @name Protected console getters/setters
+   /// @{
+   static bool setProtectedMesh(void *obj, const char *index, const char *data);
+   static const char *getProtectedMesh(void *obj, const char *data);
+   static bool setProtectedWaypoints(void *obj, const char *index, const char *data);
+
+   static bool setProtectedFrom(void *obj, const char *index, const char *data);
+   static const char *getProtectedFrom(void *obj, const char *data);
+
+   static bool setProtectedTo(void *obj, const char *index, const char *data);
+   static const char *getProtectedTo(void *obj, const char *data);
+   /// @}
+};
+
+#endif
diff --git a/Engine/source/navigation/recastPolyList.cpp b/Engine/source/navigation/recastPolyList.cpp
new file mode 100644
index 000000000..c5b3df51b
--- /dev/null
+++ b/Engine/source/navigation/recastPolyList.cpp
@@ -0,0 +1,182 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#include "recastPolyList.h"
+#include "platform/platform.h"
+
+#include "gfx/gfxDevice.h"
+#include "gfx/primBuilder.h"
+#include "gfx/gfxStateBlock.h"
+
+RecastPolyList::RecastPolyList()
+{
+   nverts = 0;
+   verts = NULL;
+   vertcap = 0;
+
+   ntris = 0;
+   tris = NULL;
+   tricap = 0;
+}
+
+RecastPolyList::~RecastPolyList()
+{
+   clear();
+}
+
+void RecastPolyList::clear()
+{
+   nverts = 0;
+   delete[] verts;
+   verts = NULL;
+   vertcap = 0;
+
+   ntris = 0;
+   delete[] tris;
+   tris = NULL;
+   tricap = 0;
+}
+
+bool RecastPolyList::isEmpty() const
+{
+   return getTriCount() == 0;
+}
+
+U32 RecastPolyList::addPoint(const Point3F &p)
+{
+   // If we've reached the vertex cap, double the array size.
+   if(nverts == vertcap)
+   {
+      // vertcap starts at 64, otherwise it doubles.
+      if(vertcap == 0) vertcap = 16;
+      else vertcap *= 2;
+      // Allocate new vertex storage.
+      F32 *newverts = new F32[vertcap*3];
+      if(!newverts)
+         return 0;
+      dMemcpy(newverts, verts, nverts*3 * sizeof(F32));
+      dFree(verts);
+      verts = newverts;
+   }
+   Point3F v = p;
+   mMatrix.mulP(v);
+   // Insert the new vertex.
+   verts[nverts*3] = v.x;
+   verts[nverts*3+1] = v.z;
+   verts[nverts*3+2] = -v.y;
+   // Return nverts before incrementing it.
+   return nverts++;
+}
+
+U32 RecastPolyList::addPlane(const PlaneF &plane)
+{
+   planes.increment();
+   mPlaneTransformer.transform(plane, planes.last());
+   return planes.size() - 1;
+}
+
+void RecastPolyList::begin(BaseMatInstance *material, U32 surfaceKey)
+{
+   vidx = 0;
+   // If we've reached the tri cap, grow the array.
+   if(ntris == tricap)
+   {
+      if(tricap == 0) tricap = 16;
+      else tricap *= 2;
+      // Allocate new vertex storage.
+      S32 *newtris = new S32[tricap*3];
+      if(!newtris)
+         return;
+      dMemcpy(newtris, tris, ntris*3 * sizeof(S32));
+      dFree(tris);
+      tris = newtris;
+   }
+}
+
+void RecastPolyList::plane(U32 v1, U32 v2, U32 v3)
+{
+}
+
+void RecastPolyList::plane(const PlaneF& p)
+{
+}
+
+void RecastPolyList::plane(const U32 index)
+{
+}
+
+void RecastPolyList::vertex(U32 vi)
+{
+   if(vidx == 3)
+      return;
+   tris[ntris*3+2-vidx] = vi;
+   vidx++;
+}
+
+void RecastPolyList::end()
+{
+   ntris++;
+}
+
+U32 RecastPolyList::getVertCount() const
+{
+   return nverts;
+}
+
+const F32 *RecastPolyList::getVerts() const
+{
+   return verts;
+}
+
+U32 RecastPolyList::getTriCount() const
+{
+   return ntris;
+}
+
+const S32 *RecastPolyList::getTris() const
+{
+   return tris;
+}
+
+void RecastPolyList::renderWire() const
+{
+   GFXStateBlockDesc desc;
+   desc.setCullMode(GFXCullNone);
+   desc.setZReadWrite(false, false);
+   //desc.setBlend(true);
+   GFXStateBlockRef sb = GFX->createStateBlock(desc);
+   GFX->setStateBlock(sb);
+
+   PrimBuild::color3i(255, 0, 255);
+
+   for(U32 t = 0; t < getTriCount(); t++)
+   {
+      PrimBuild::begin(GFXLineStrip, 4);
+
+      PrimBuild::vertex3f(verts[tris[t*3]*3],   -verts[tris[t*3]*3+2],   verts[tris[t*3]*3+1]);
+      PrimBuild::vertex3f(verts[tris[t*3+1]*3], -verts[tris[t*3+1]*3+2], verts[tris[t*3+1]*3+1]);
+      PrimBuild::vertex3f(verts[tris[t*3+2]*3], -verts[tris[t*3+2]*3+2], verts[tris[t*3+2]*3+1]);
+      PrimBuild::vertex3f(verts[tris[t*3]*3],   -verts[tris[t*3]*3+2],   verts[tris[t*3]*3+1]);
+
+      PrimBuild::end();
+   }
+}
diff --git a/Engine/source/navigation/recastPolyList.h b/Engine/source/navigation/recastPolyList.h
new file mode 100644
index 000000000..2a3de4512
--- /dev/null
+++ b/Engine/source/navigation/recastPolyList.h
@@ -0,0 +1,99 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#ifndef _RECAST_POLYLIST_H_
+#define _RECAST_POLYLIST_H_
+
+#include "collision/abstractPolyList.h"
+#include "core/util/tVector.h"
+
+/// Represents polygons in the same manner as the .obj file format. Handy for
+/// padding data to Recast, since it expects this data format. At the moment,
+/// this class only accepts triangles.
+/// @see AbstractPolyList
+class RecastPolyList : public AbstractPolyList {
+public:
+   /// @name AbstractPolyList
+   /// @{
+
+   bool isEmpty() const;
+
+   U32 addPoint(const Point3F &p);
+   U32 addPlane(const PlaneF &plane);
+
+   void begin(BaseMatInstance *material, U32 surfaceKey);
+
+   void plane(U32 v1, U32 v2, U32 v3);
+   void plane(const PlaneF& p);
+   void plane(const U32 index);
+
+   void vertex(U32 vi);
+
+   void end();
+
+   /// @}
+
+   /// @name Data interface
+   /// @{
+   U32 getVertCount() const;
+   const F32 *getVerts() const;
+
+   U32 getTriCount() const;
+   const S32 *getTris() const;
+
+   void clear();
+   /// @}
+
+   void renderWire() const;
+
+   /// Default constructor.
+   RecastPolyList();
+   /// Default destructor.
+   ~RecastPolyList();
+
+protected:
+   /// Number of vertices defined.
+   U32 nverts;
+   /// Array of vertex coordinates. Size nverts*3
+   F32 *verts;
+   /// Size of vertex array.
+   U32 vertcap;
+
+   /// Number of triangles defined.
+   U32 ntris;
+   /// Array of triangle vertex indices. Size ntris*3
+   S32 *tris;
+   /// Size of triangle array.
+   U32 tricap;
+
+   /// Index of vertex we're adding to the current triangle.
+   U8 vidx;
+
+   /// Store a list of planes - not actually used.
+   Vector<PlaneF> planes;
+   /// Another inherited utility function.
+   const PlaneF& getIndexedPlane(const U32 index) { return planes[index]; }
+
+private:
+};
+
+#endif
diff --git a/Engine/source/navigation/torqueRecast.h b/Engine/source/navigation/torqueRecast.h
new file mode 100644
index 000000000..0e79e5cf7
--- /dev/null
+++ b/Engine/source/navigation/torqueRecast.h
@@ -0,0 +1,72 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#ifndef _TORQUE_RECAST_H_
+#define _TORQUE_RECAST_H_
+
+#include "console/simSet.h"
+#include "math/mPoint3.h"
+#include "math/mBox.h"
+
+inline Point3F DTStoRC(F32 x, F32 y, F32 z) { return Point3F(x, z, -y); }
+inline Point3F DTStoRC(Point3F point)       { return Point3F(point.x, point.z, -point.y); }
+inline Point3F RCtoDTS(const F32* xyz)      { return Point3F(xyz[0], -xyz[2], xyz[1]); }
+inline Point3F RCtoDTS(F32 x, F32 y, F32 z) { return Point3F(x, -z, y); }
+inline Point3F RCtoDTS(Point3F point)       { return Point3F(point.x, -point.z, point.y); }
+inline Box3F DTStoRC(Box3F box)
+{
+   return Box3F(box.minExtents.x, box.minExtents.z, -box.maxExtents.y,
+                  box.maxExtents.x, box.maxExtents.z, -box.minExtents.y);
+}
+inline Box3F RCtoDTS(const F32 *min, const F32 *max)
+{
+   return Box3F(min[0], -max[2], min[1], max[0], -min[2], max[1]);
+}
+
+/// Convert a Rcast colour integer to RGBA components.
+inline void rcCol(unsigned int col, U8 &r, U8 &g, U8 &b, U8 &a)
+{
+   r = col % 256; col /= 256;
+   g = col % 256; col /= 256;
+   b = col % 256; col /= 256;
+   a = col % 256;
+}
+
+enum PolyAreas {
+   GroundArea,
+   WaterArea,
+   OffMeshArea,
+   NumAreas
+};
+
+enum PolyFlags {
+   WalkFlag = 1 << 0,
+   SwimFlag = 1 << 1,
+   JumpFlag = 1 << 2,
+   LedgeFlag = 1 << 3,
+   DropFlag = 1 << 4,
+   ClimbFlag = 1 << 5,
+   TeleportFlag = 1 << 6,
+   AllFlags = 0xffff
+};
+
+#endif
diff --git a/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.cpp b/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.cpp
index 2ef74ac1e..bc5fa8b1c 100644
--- a/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.cpp
+++ b/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.cpp
@@ -42,6 +42,7 @@ IMPLEMENT_CONOBJECT(BarrelDistortionPostEffect);
 BarrelDistortionPostEffect::BarrelDistortionPostEffect() 
    :  PostEffect(),
       mHmdWarpParamSC(NULL),
+      mHmdChromaAbSC(NULL),
       mScaleSC(NULL),
       mScaleInSC(NULL),
       mLensCenterSC(NULL),
@@ -85,23 +86,22 @@ void BarrelDistortionPostEffect::onRemove()
 
 void BarrelDistortionPostEffect::_setupConstants( const SceneRenderState *state )
 {
+   // Test if setup is required before calling the parent method as the parent method
+   // will set up the shader constants buffer for us.
+   bool setupRequired = mShaderConsts.isNull();
+
    Parent::_setupConstants(state);
 
    // Define the shader constants
-   if(!mHmdWarpParamSC)
+   if(setupRequired)
+   {
       mHmdWarpParamSC = mShader->getShaderConstHandle( "$HmdWarpParam" );
-
-   if(!mScaleSC)
+      mHmdChromaAbSC = mShader->getShaderConstHandle( "$HmdChromaAbParam" );
       mScaleSC = mShader->getShaderConstHandle( "$Scale" );
-
-   if(!mScaleInSC)
       mScaleInSC = mShader->getShaderConstHandle( "$ScaleIn" );
-
-   if(!mLensCenterSC)
       mLensCenterSC = mShader->getShaderConstHandle( "$LensCenter" );
-
-   if(!mScreenCenterSC)
       mScreenCenterSC = mShader->getShaderConstHandle( "$ScreenCenter" );
+   }
 
    const Point2I &resolution = GFX->getActiveRenderTarget()->getSize();
    F32 widthScale = 0.5f;
@@ -119,6 +119,12 @@ void BarrelDistortionPostEffect::_setupConstants( const SceneRenderState *state
          mShaderConsts->set( mHmdWarpParamSC, distortion );
       }
 
+      if(mHmdChromaAbSC->isValid())
+      {
+         const Point4F& correction = hmd->getChromaticAbCorrection();
+         mShaderConsts->set( mHmdChromaAbSC, correction );
+      }
+
       if(mScaleSC->isValid())
       {
          F32 scaleFactor = hmd->getDistortionScale();
@@ -149,6 +155,11 @@ void BarrelDistortionPostEffect::_setupConstants( const SceneRenderState *state
          mShaderConsts->set( mHmdWarpParamSC, Point4F(0.0f, 0.0f, 0.0f, 0.0f) );
       }
 
+      if(mHmdChromaAbSC->isValid())
+      {
+         mShaderConsts->set( mHmdChromaAbSC, Point4F(1.0f, 0.0f, 1.0f, 0.0f) );
+      }
+
       if(mScaleSC->isValid())
       {
          mShaderConsts->set( mScaleSC, Point2F(1.0f, 1.0f) );
diff --git a/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.h b/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.h
index 389bd96ff..986ff2a67 100644
--- a/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.h
+++ b/Engine/source/platform/input/oculusVR/barrelDistortionPostEffect.h
@@ -31,6 +31,7 @@ class BarrelDistortionPostEffect : public PostEffect
 
 protected:
    GFXShaderConstHandle *mHmdWarpParamSC;
+   GFXShaderConstHandle *mHmdChromaAbSC;
    GFXShaderConstHandle *mScaleSC;
    GFXShaderConstHandle *mScaleInSC;
    GFXShaderConstHandle *mLensCenterSC;
diff --git a/Engine/source/platform/input/oculusVR/oculusVRDevice.cpp b/Engine/source/platform/input/oculusVR/oculusVRDevice.cpp
index c6dd0513a..695880988 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRDevice.cpp
+++ b/Engine/source/platform/input/oculusVR/oculusVRDevice.cpp
@@ -60,12 +60,16 @@ bool OculusVRDevice::smEnableDevice = true;
 
 bool OculusVRDevice::smSimulateHMD = true;
 
+bool OculusVRDevice::smUseChromaticAberrationCorrection = true;
+
 bool OculusVRDevice::smGenerateAngleAxisRotationEvents = true;
 bool OculusVRDevice::smGenerateEulerRotationEvents = false;
 
 bool OculusVRDevice::smGenerateRotationAsAxisEvents = false;
 F32 OculusVRDevice::smMaximumAxisAngle = 25.0f;
 
+bool OculusVRDevice::smGenerateSensorRawEvents = false;
+
 bool OculusVRDevice::smGenerateWholeFrameEvents = false;
 
 OculusVRDevice::OculusVRDevice()
@@ -99,6 +103,10 @@ void OculusVRDevice::staticInit()
       "@brief If true, the Oculus VR device will be enabled, if present.\n\n"
 	   "@ingroup Game");
 
+   Con::addVariable("pref::OculusVR::UseChromaticAberrationCorrection", TypeBool, &smUseChromaticAberrationCorrection, 
+      "@brief If true, Use the chromatic aberration correction version of the Oculus VR barrel distortion shader.\n\n"
+	   "@ingroup Game");
+
    Con::addVariable("OculusVR::GenerateAngleAxisRotationEvents", TypeBool, &smGenerateAngleAxisRotationEvents, 
       "@brief If true, broadcast sensor rotation events as angled axis.\n\n"
 	   "@ingroup Game");
@@ -114,6 +122,10 @@ void OculusVRDevice::staticInit()
       "Should range from 0 to 90 degrees.\n\n"
 	   "@ingroup Game");
 
+   Con::addVariable("OculusVR::GenerateSensorRawEvents", TypeBool, &smGenerateSensorRawEvents, 
+      "@brief If ture, broadcast sensor raw data: acceleration, angular velocity, magnetometer reading.\n\n"
+	   "@ingroup Game");
+
    Con::addVariable("OculusVR::GenerateWholeFrameEvents", TypeBool, &smGenerateWholeFrameEvents, 
       "@brief Indicates that a whole frame event should be generated and frames should be buffered.\n\n"
 	   "@ingroup Game");
@@ -313,7 +325,7 @@ bool OculusVRDevice::process()
    // Process each sensor
    for(U32 i=0; i<mSensorDevices.size(); ++i)
    {
-      mSensorDevices[i]->process(mDeviceType, smGenerateAngleAxisRotationEvents, smGenerateEulerRotationEvents, smGenerateRotationAsAxisEvents, maxAxisRadius);
+      mSensorDevices[i]->process(mDeviceType, smGenerateAngleAxisRotationEvents, smGenerateEulerRotationEvents, smGenerateRotationAsAxisEvents, maxAxisRadius, smGenerateSensorRawEvents);
    }
 
    return true;
@@ -391,6 +403,22 @@ const OculusVRHMDDevice* OculusVRDevice::getHMDDevice(U32 index) const
    return mHMDDevices[index];
 }
 
+F32 OculusVRDevice::getHMDCurrentIPD(U32 index)
+{
+   if(index >= mHMDDevices.size())
+      return -1.0f;
+
+   return mHMDDevices[index]->getIPD();
+}
+
+void OculusVRDevice::setHMDCurrentIPD(U32 index, F32 ipd)
+{
+   if(index >= mHMDDevices.size())
+      return;
+
+   return mHMDDevices[index]->setIPD(ipd, mScaleInputTexture);
+}
+
 //-----------------------------------------------------------------------------
 
 const OculusVRSensorDevice* OculusVRDevice::getSensorDevice(U32 index) const
@@ -409,6 +437,30 @@ EulerF OculusVRDevice::getSensorEulerRotation(U32 index)
    return mSensorDevices[index]->getEulerRotation();
 }
 
+VectorF OculusVRDevice::getSensorAcceleration(U32 index)
+{
+   if(index >= mSensorDevices.size())
+      return Point3F::Zero;
+
+   return mSensorDevices[index]->getAcceleration();
+}
+
+EulerF OculusVRDevice::getSensorAngularVelocity(U32 index)
+{
+   if(index >= mSensorDevices.size())
+      return Point3F::Zero;
+
+   return mSensorDevices[index]->getAngularVelocity();
+}
+
+VectorF OculusVRDevice::getSensorMagnetometer(U32 index)
+{
+   if(index >= mSensorDevices.size())
+      return Point3F::Zero;
+
+   return mSensorDevices[index]->getMagnetometer();
+}
+
 F32 OculusVRDevice::getSensorPredictionTime(U32 index)
 {
    const OculusVRSensorDevice* sensor = getSensorDevice(index);
@@ -438,6 +490,57 @@ void OculusVRDevice::setAllSensorPredictionTime(F32 dt)
    }
 }
 
+bool OculusVRDevice::getSensorGravityCorrection(U32 index)
+{
+   const OculusVRSensorDevice* sensor = getSensorDevice(index);
+   if(!sensor || !sensor->isValid())
+      return false;
+
+   return sensor->getGravityCorrection();
+}
+
+void OculusVRDevice::setSensorGravityCorrection(U32 index, bool state)
+{
+   if(index >= mSensorDevices.size())
+      return;
+
+   OculusVRSensorDevice* sensor = mSensorDevices[index];
+   if(!sensor->isValid())
+      return;
+
+   sensor->setGravityCorrection(state);
+}
+
+bool OculusVRDevice::getSensorYawCorrection(U32 index)
+{
+   const OculusVRSensorDevice* sensor = getSensorDevice(index);
+   if(!sensor || !sensor->isValid())
+      return false;
+
+   return sensor->getYawCorrection();
+}
+
+void OculusVRDevice::setSensorYawCorrection(U32 index, bool state)
+{
+   if(index >= mSensorDevices.size())
+      return;
+
+   OculusVRSensorDevice* sensor = mSensorDevices[index];
+   if(!sensor->isValid())
+      return;
+
+   sensor->setYawCorrection(state);
+}
+
+bool OculusVRDevice::getSensorMagnetometerCalibrated(U32 index)
+{
+   const OculusVRSensorDevice* sensor = getSensorDevice(index);
+   if(!sensor || !sensor->isValid())
+      return false;
+
+   return sensor->getMagnetometerCalibrationAvailable();
+}
+
 void OculusVRDevice::resetAllSensors()
 {
    // Reset each sensor
@@ -628,6 +731,46 @@ DefineEngineFunction(getOVRHMDDisplayDeviceName, const char*, (S32 index),,
    return hmd->getDisplayDeviceName();
 }
 
+DefineEngineFunction(getOVRHMDDisplayDeviceId, S32, (S32 index),,
+   "@brief MacOS display ID.\n\n"
+   "@param index The HMD index.\n"
+   "@return The ID of the HMD display device, if any.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return -1;
+   }
+
+   const OculusVRHMDDevice* hmd = OCULUSVRDEV->getHMDDevice(index);
+   if(!hmd)
+   {
+      return -1;
+   }
+
+   return hmd->getDisplayDeviceId();
+}
+
+DefineEngineFunction(getOVRHMDDisplayDesktopPos, Point2I, (S32 index),,
+   "@brief Desktop coordinate position of the screen (can be negative; may not be present on all platforms).\n\n"
+   "@param index The HMD index.\n"
+   "@return Position of the screen.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return Point2I::Zero;
+   }
+
+   const OculusVRHMDDevice* hmd = OCULUSVRDEV->getHMDDevice(index);
+   if(!hmd)
+   {
+      return Point2I::Zero;
+   }
+
+   return hmd->getDesktopPosition();
+}
+
 DefineEngineFunction(getOVRHMDResolution, Point2I, (S32 index),,
    "@brief Provides the OVR HMD screen resolution.\n\n"
    "@param index The HMD index.\n"
@@ -672,6 +815,78 @@ DefineEngineFunction(getOVRHMDDistortionCoefficients, String, (S32 index),,
    return buf;
 }
 
+DefineEngineFunction(getOVRHMDChromaticAbCorrection, String, (S32 index),,
+   "@brief Provides the OVR HMD chromatic aberration correction values.\n\n"
+   "@param index The HMD index.\n"
+   "@return A four component string with the chromatic aberration correction values.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return "1 0 1 0";
+   }
+
+   const OculusVRHMDDevice* hmd = OCULUSVRDEV->getHMDDevice(index);
+   if(!hmd)
+   {
+      return "1 0 1 0";
+   }
+
+   const Point4F& c = hmd->getChromaticAbCorrection();
+   char buf[256];
+   dSprintf(buf, 256, "%g %g %g %g", c.x, c.y, c.z, c.w);
+
+   return buf;
+}
+
+DefineEngineFunction(getOVRHMDProfileIPD, F32, (S32 index),,
+   "@brief Physical distance between the user's eye centers as defined by the current profile.\n\n"
+   "@param index The HMD index.\n"
+   "@return The profile IPD.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return -1.0f;
+   }
+
+   const OculusVRHMDDevice* hmd = OCULUSVRDEV->getHMDDevice(index);
+   if(!hmd)
+   {
+      return -1.0f;
+   }
+
+   return hmd->getProfileIPD();
+}
+
+DefineEngineFunction(getOVRHMDCurrentIPD, F32, (S32 index),,
+   "@brief Physical distance between the user's eye centers.\n\n"
+   "@param index The HMD index.\n"
+   "@return The current IPD.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return -1.0f;
+   }
+
+   return OCULUSVRDEV->getHMDCurrentIPD(index);
+}
+
+DefineEngineFunction(setOVRHMDCurrentIPD, void, (S32 index, F32 ipd),,
+   "@brief Set the physical distance between the user's eye centers.\n\n"
+   "@param index The HMD index.\n"
+   "@param ipd The IPD to use.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return;
+   }
+
+   OCULUSVRDEV->setHMDCurrentIPD(index, ipd);
+}
+
 DefineEngineFunction(getOVRHMDEyeXOffsets, Point2F, (S32 index),,
    "@brief Provides the OVR HMD eye x offsets in uv coordinates.\n\n"
    "@param index The HMD index.\n"
@@ -787,6 +1002,49 @@ DefineEngineFunction(getOVRSensorEulerRotation, Point3F, (S32 index),,
    return Point3F(mRadToDeg(rot.x), mRadToDeg(rot.y), mRadToDeg(rot.z));
 }
 
+DefineEngineFunction(getOVRSensorAcceleration, Point3F, (S32 index),,
+   "@brief Get the acceleration values for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@return The acceleration values of the Oculus VR sensor, in m/s^2.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return Point3F::Zero;
+   }
+
+   return OCULUSVRDEV->getSensorAcceleration(index);
+}
+
+DefineEngineFunction(getOVRSensorAngVelocity, Point3F, (S32 index),,
+   "@brief Get the angular velocity values for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@return The angular velocity values of the Oculus VR sensor, in degrees/s.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return Point3F::Zero;
+   }
+
+   EulerF rot = OCULUSVRDEV->getSensorAngularVelocity(index);
+   return Point3F(mRadToDeg(rot.x), mRadToDeg(rot.y), mRadToDeg(rot.z));
+}
+
+DefineEngineFunction(getOVRSensorMagnetometer, Point3F, (S32 index),,
+   "@brief Get the magnetometer reading (direction and field strength) for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@return The magnetometer reading (direction and field strength) of the Oculus VR sensor, in Gauss.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return Point3F::Zero;
+   }
+
+   return OCULUSVRDEV->getSensorMagnetometer(index);
+}
+
 DefineEngineFunction(getOVRSensorPredictionTime, F32, (S32 index),,
    "@brief Get the prediction time set for the given sensor index.\n\n"
    "@param index The sensor index.\n"
@@ -828,6 +1086,78 @@ DefineEngineFunction(setAllSensorPredictionTime, void, (F32 dt),,
    OCULUSVRDEV->setAllSensorPredictionTime(dt);
 }
 
+DefineEngineFunction(getOVRSensorGravityCorrection, bool, (S32 index),,
+   "@brief Get the gravity correction state for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@return True if gravity correction (for pitch and roll) is active.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return false;
+   }
+
+   return OCULUSVRDEV->getSensorGravityCorrection(index);
+}
+
+DefineEngineFunction(setOVRSensorGravityCorrection, void, (S32 index, bool state),,
+   "@brief Set the gravity correction state for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@param state The gravity correction state to change to.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return;
+   }
+
+   OCULUSVRDEV->setSensorGravityCorrection(index, state);
+}
+
+DefineEngineFunction(getOVRSensorYawCorrection, bool, (S32 index),,
+   "@brief Get the yaw correction state for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@return True if yaw correction (using magnetometer calibration data) is active.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return false;
+   }
+
+   return OCULUSVRDEV->getSensorYawCorrection(index);
+}
+
+DefineEngineFunction(setOVRSensorYawCorrection, void, (S32 index, bool state),,
+   "@brief Set the yaw correction state for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@param state The yaw correction state to change to.\n"
+   "@note Yaw correction cannot be enabled if the user has disabled it through "
+   "the Oculus VR control panel.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return;
+   }
+
+   OCULUSVRDEV->setSensorYawCorrection(index, state);
+}
+
+DefineEngineFunction(getOVRSensorMagnetometerCalibrated, bool, (S32 index),,
+   "@brief Get the magnetometer calibrated data state for the given sensor index.\n\n"
+   "@param index The sensor index.\n"
+   "@return True if magnetometer calibration data is available.\n"
+   "@ingroup Game")
+{
+   if(!ManagedSingleton<OculusVRDevice>::instanceOrNull())
+   {
+      return false;
+   }
+
+   return OCULUSVRDEV->getSensorMagnetometerCalibrated(index);
+}
+
 DefineEngineFunction(ovrResetAllSensors, void, (),,
    "@brief Resets all Oculus VR sensors.\n\n"
    "This resets all sensor orientations such that their 'normal' rotation "
diff --git a/Engine/source/platform/input/oculusVR/oculusVRDevice.h b/Engine/source/platform/input/oculusVR/oculusVRDevice.h
index 4ca37a6cc..429702322 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRDevice.h
+++ b/Engine/source/platform/input/oculusVR/oculusVRDevice.h
@@ -44,6 +44,10 @@ public:
    // If no HMD is present simulate it being available
    static bool smSimulateHMD;
 
+   // Use the chromatic aberration correction version of the barrel
+   // distortion shader.
+   static bool smUseChromaticAberrationCorrection;
+
    // Type of rotation events to broadcast
    static bool smGenerateAngleAxisRotationEvents;
    static bool smGenerateEulerRotationEvents;
@@ -55,6 +59,9 @@ public:
    // as measured from a vector pointing straight up (in degrees)
    static F32 smMaximumAxisAngle;
 
+   // Broadcast sensor raw data: acceleration, angular velocity, magnetometer reading
+   static bool smGenerateSensorRawEvents;
+
    // Indicates that a whole frame event should be generated and frames
    // should be buffered.
    static bool smGenerateWholeFrameEvents;
@@ -131,14 +138,24 @@ public:
    // HMDs
    U32 getHMDCount() const { return mHMDDevices.size(); }
    const OculusVRHMDDevice* getHMDDevice(U32 index) const;
+   F32 getHMDCurrentIPD(U32 index);
+   void setHMDCurrentIPD(U32 index, F32 ipd);
 
    // Sensors
    U32 getSensorCount() const { return mSensorDevices.size(); }
    const OculusVRSensorDevice* getSensorDevice(U32 index) const;
    EulerF getSensorEulerRotation(U32 index);
+   VectorF getSensorAcceleration(U32 index);
+   EulerF getSensorAngularVelocity(U32 index);
+   VectorF getSensorMagnetometer(U32 index);
    F32 getSensorPredictionTime(U32 index);
    void setSensorPredictionTime(U32 index, F32 dt);
    void setAllSensorPredictionTime(F32 dt);
+   bool getSensorGravityCorrection(U32 index);
+   void setSensorGravityCorrection(U32 index, bool state);
+   bool getSensorYawCorrection(U32 index);
+   void setSensorYawCorrection(U32 index, bool state);
+   bool getSensorMagnetometerCalibrated(U32 index);
    void resetAllSensors();
 
 public:
diff --git a/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.cpp b/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.cpp
index 96919ff81..1a2a5a0c0 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.cpp
+++ b/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.cpp
@@ -58,6 +58,10 @@ void OculusVRHMDDevice::set(OVR::HMDDevice* hmd, OVR::HMDInfo& info, bool calcul
    mVersion = info.Version;
 
    mDisplayDeviceName = info.DisplayDeviceName;
+   mDisplayId = info.DisplayId;
+
+   mDesktopPosition.x = info.DesktopX;
+   mDesktopPosition.y = info.DesktopY;
 
    mResolution.x = info.HResolution;
    mResolution.y = info.VResolution;
@@ -68,13 +72,19 @@ void OculusVRHMDDevice::set(OVR::HMDDevice* hmd, OVR::HMDInfo& info, bool calcul
    mVerticalEyeCenter = info.VScreenCenter;
    mEyeToScreen = info.EyeToScreenDistance;
    mLensSeparation = info.LensSeparationDistance;
-   mInterpupillaryDistance = info.InterpupillaryDistance;
+   mProfileInterpupillaryDistance = info.InterpupillaryDistance;
+   mInterpupillaryDistance = mProfileInterpupillaryDistance;
 
    mKDistortion.x = info.DistortionK[0];
    mKDistortion.y = info.DistortionK[1];
    mKDistortion.z = info.DistortionK[2];
    mKDistortion.w = info.DistortionK[3];
 
+   mChromaticAbCorrection.x = info.ChromaAbCorrection[0];
+   mChromaticAbCorrection.y = info.ChromaAbCorrection[1];
+   mChromaticAbCorrection.z = info.ChromaAbCorrection[2];
+   mChromaticAbCorrection.w = info.ChromaAbCorrection[3];
+
    // Calculated values
    calculateValues(calculateDistortionScale);
 
@@ -109,13 +119,27 @@ void OculusVRHMDDevice::createSimulatedPreviewRift(bool calculateDistortionScale
    mVerticalEyeCenter = 0.046799999f;
    mEyeToScreen = 0.041000001f;
    mLensSeparation = 0.064000003f;
-   mInterpupillaryDistance = 0.064000003f;
+   mProfileInterpupillaryDistance = 0.064000003f;
+   mInterpupillaryDistance = mProfileInterpupillaryDistance;
 
    mKDistortion.x = 1.0000000f;
    mKDistortion.y = 0.22000000f;
    mKDistortion.z = 0.23999999f;
    mKDistortion.w = 0.00000000f;
 
+   mChromaticAbCorrection.x = 0.995999f;
+   mChromaticAbCorrection.y = -0.004f;
+   mChromaticAbCorrection.z = 1.014f;
+   mChromaticAbCorrection.w = 0.0f;
+
+   calculateValues(calculateDistortionScale);
+}
+
+void OculusVRHMDDevice::setIPD(F32 ipd, bool calculateDistortionScale)
+{
+   mInterpupillaryDistance = ipd;
+
+   // Recalculate as some values rely on the IPD
    calculateValues(calculateDistortionScale);
 }
 
diff --git a/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.h b/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.h
index dce8234eb..ddea2a48e 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.h
+++ b/Engine/source/platform/input/oculusVR/oculusVRHMDDevice.h
@@ -54,6 +54,12 @@ protected:
    // Windows display device name used in EnumDisplaySettings/CreateDC
    String   mDisplayDeviceName;
 
+   // MacOS display ID
+   S32      mDisplayId;
+
+   // Desktop coordinate position of the screen (can be negative; may not be present on all platforms)
+   Point2I  mDesktopPosition;
+
    // Whole screen resolution
    Point2I  mResolution;
 
@@ -70,6 +76,9 @@ protected:
    // Physical distance between lens centers, in meters
    F32      mLensSeparation;
 
+   // Physical distance between the user's eye centers as defined in the current profile
+   F32      mProfileInterpupillaryDistance;
+
    // Physical distance between the user's eye centers
    F32      mInterpupillaryDistance;
 
@@ -79,6 +88,9 @@ protected:
    // Radial distortion correction coefficients used by the barrel distortion shader
    Point4F  mKDistortion;
 
+   // Chromatic aberration correction coefficients
+   Point4F mChromaticAbCorrection;
+
    // Calculated values of eye x offset from center in normalized (uv) coordinates
    // where each eye is 0..1.  Used for the mono to stereo postFX to simulate an
    // eye offset of the camera.  The x component is the left eye, the y component
@@ -137,6 +149,12 @@ public:
    // Windows display device name used in EnumDisplaySettings/CreateDC
    const char* getDisplayDeviceName() const { return mDisplayDeviceName.c_str(); }
 
+   // MacOS display ID
+   S32 getDisplayDeviceId() const { return mDisplayId; }
+
+   // Desktop coordinate position of the screen (can be negative; may not be present on all platforms)
+   const Point2I& getDesktopPosition() const { return mDesktopPosition; }
+
    // Whole screen resolution
    const Point2I& getResolution() const { return mResolution; }
 
@@ -153,15 +171,24 @@ public:
    // Physical distance between lens centers, in meters
    F32 getLensSeparation() const { return mLensSeparation; }
 
+   // Physical distance between the user's eye centers as defined by the current profile
+   F32 getProfileIPD() const { return mProfileInterpupillaryDistance; }
+
    // Physical distance between the user's eye centers
    F32 getIPD() const { return mInterpupillaryDistance; }
 
+   // Set a new physical distance between the user's eye centers
+   void setIPD(F32 ipd, bool calculateDistortionScale);
+
    // Provides the IPD of one eye as a Point3F
    const Point3F& getEyeWorldOffset() const { return mEyeWorldOffset; }
 
    // Radial distortion correction coefficients used by the barrel distortion shader
    const Point4F& getKDistortion() const { return mKDistortion; }
 
+   // Chromatic aberration correction coefficients used by the barrel distortion shader
+   const Point4F& getChromaticAbCorrection() const { return mChromaticAbCorrection; }
+
    // Calculated values of eye x offset from center in normalized (uv) coordinates.
    const Point2F& getEyeUVOffset() const { return mEyeUVOffset; }
 
diff --git a/Engine/source/platform/input/oculusVR/oculusVRSensorData.cpp b/Engine/source/platform/input/oculusVR/oculusVRSensorData.cpp
index 8d3a44b80..62ff80e67 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRSensorData.cpp
+++ b/Engine/source/platform/input/oculusVR/oculusVRSensorData.cpp
@@ -34,7 +34,7 @@ void OculusVRSensorData::reset()
    mDataSet = false;
 }
 
-void OculusVRSensorData::setData(const OVR::SensorFusion& data, const F32& maxAxisRadius)
+void OculusVRSensorData::setData(OVR::SensorFusion& data, const F32& maxAxisRadius)
 {
    // Sensor rotation
    OVR::Quatf orientation;
@@ -56,6 +56,24 @@ void OculusVRSensorData::setData(const OVR::SensorFusion& data, const F32& maxAx
    // Sensor rotation as axis
    OculusVRUtil::calculateAxisRotation(mRot, maxAxisRadius, mRotAxis);
 
+   // Sensor raw values
+   OVR::Vector3f accel = data.GetAcceleration();
+   OculusVRUtil::convertAcceleration(accel, mAcceleration);
+
+   OVR::Vector3f angVel = data.GetAngularVelocity();
+   OculusVRUtil::convertAngularVelocity(angVel, mAngVelocity);
+
+   OVR::Vector3f mag;
+   if(data.HasMagCalibration() && data.IsYawCorrectionEnabled())
+   {
+      mag = data.GetCalibratedMagnetometer();
+   }
+   else
+   {
+      mag = data.GetMagnetometer();
+   }
+   OculusVRUtil::convertMagnetometer(mag, mMagnetometer);
+
    mDataSet = true;
 }
 
@@ -69,10 +87,15 @@ void OculusVRSensorData::simulateData(const F32& maxAxisRadius)
    // Sensor rotation as axis
    OculusVRUtil::calculateAxisRotation(mRot, maxAxisRadius, mRotAxis);
 
+   // Sensor raw values
+   mAcceleration.zero();
+   mAngVelocity.zero();
+   mMagnetometer.zero();
+
    mDataSet = true;
 }
 
-U32 OculusVRSensorData::compare(OculusVRSensorData* other)
+U32 OculusVRSensorData::compare(OculusVRSensorData* other, bool doRawCompare)
 {
    S32 result = DIFF_NONE;
 
@@ -92,5 +115,22 @@ U32 OculusVRSensorData::compare(OculusVRSensorData* other)
       result |= DIFF_ROTAXISY;
    }
 
+   // Check raw values
+   if(doRawCompare)
+   {
+      if(mAcceleration.x != other->mAcceleration.x || mAcceleration.y != other->mAcceleration.y || mAcceleration.z != other->mAcceleration.z || !mDataSet)
+      {
+         result |= DIFF_ACCEL;
+      }
+      if(mAngVelocity.x != other->mAngVelocity.x || mAngVelocity.y != other->mAngVelocity.y || mAngVelocity.z != other->mAngVelocity.z || !mDataSet)
+      {
+         result |= DIFF_ANGVEL;
+      }
+      if(mMagnetometer.x != other->mMagnetometer.x || mMagnetometer.y != other->mMagnetometer.y || mMagnetometer.z != other->mMagnetometer.z || !mDataSet)
+      {
+         result |= DIFF_MAG;
+      }
+   }
+
    return result;
 }
diff --git a/Engine/source/platform/input/oculusVR/oculusVRSensorData.h b/Engine/source/platform/input/oculusVR/oculusVRSensorData.h
index 8f73e4531..796e09cc2 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRSensorData.h
+++ b/Engine/source/platform/input/oculusVR/oculusVRSensorData.h
@@ -36,8 +36,12 @@ struct OculusVRSensorData
       DIFF_ROT             = (1<<0),
       DIFF_ROTAXISX        = (1<<1),
       DIFF_ROTAXISY        = (1<<2),
+      DIFF_ACCEL           = (1<<3),
+      DIFF_ANGVEL          = (1<<4),
+      DIFF_MAG             = (1<<5),
 
       DIFF_ROTAXIS = (DIFF_ROTAXISX | DIFF_ROTAXISY),
+      DIFF_RAW = (DIFF_ACCEL | DIFF_ANGVEL | DIFF_MAG),
    };
 
    bool mDataSet;
@@ -50,19 +54,24 @@ struct OculusVRSensorData
    // Controller rotation as axis x, y
    Point2F mRotAxis;
 
+   // Raw values
+   VectorF mAcceleration;
+   EulerF  mAngVelocity;
+   VectorF mMagnetometer;
+
    OculusVRSensorData();
 
    /// Reset the data
    void reset();
 
    /// Set data based on given sensor fusion
-   void setData(const OVR::SensorFusion& data, const F32& maxAxisRadius);
+   void setData(OVR::SensorFusion& data, const F32& maxAxisRadius);
 
    /// Simulate valid data
    void simulateData(const F32& maxAxisRadius);
 
    /// Compare this data and given and return differences
-   U32 compare(OculusVRSensorData* other);
+   U32 compare(OculusVRSensorData* other, bool doRawCompare);
 };
 
 #endif   // _OCULUSVRSENSORDATA_H_
diff --git a/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.cpp b/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.cpp
index dad2bba20..54b5c8467 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.cpp
+++ b/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.cpp
@@ -29,6 +29,9 @@ U32 OculusVRSensorDevice::OVR_SENSORROT[OculusVRConstants::MaxSensors] = {0};
 U32 OculusVRSensorDevice::OVR_SENSORROTANG[OculusVRConstants::MaxSensors] = {0};
 U32 OculusVRSensorDevice::OVR_SENSORROTAXISX[OculusVRConstants::MaxSensors] = {0};
 U32 OculusVRSensorDevice::OVR_SENSORROTAXISY[OculusVRConstants::MaxSensors] = {0};
+U32 OculusVRSensorDevice::OVR_SENSORACCELERATION[OculusVRConstants::MaxSensors] = {0};
+U32 OculusVRSensorDevice::OVR_SENSORANGVEL[OculusVRConstants::MaxSensors] = {0};
+U32 OculusVRSensorDevice::OVR_SENSORMAGNETOMETER[OculusVRConstants::MaxSensors] = {0};
 
 OculusVRSensorDevice::OculusVRSensorDevice()
 {
@@ -74,6 +77,7 @@ void OculusVRSensorDevice::set(OVR::SensorDevice* sensor, OVR::SensorInfo& info,
 
    mDevice = sensor;
    mSensorFusion.AttachToSensor(sensor);
+   mYawCorrectionDisabled = !mSensorFusion.IsYawCorrectionEnabled();
 
    // DeviceInfo
    mProductName = info.ProductName;
@@ -110,6 +114,7 @@ void OculusVRSensorDevice::createSimulatedPreviewRift(S32 actionCodeIndex)
 {
    mIsValid = false;
    mIsSimulation = true;
+   mYawCorrectionDisabled = true;
 
    // DeviceInfo
    mProductName = "Tracker DK";
@@ -145,6 +150,10 @@ void OculusVRSensorDevice::buildCodeTable()
 
       OVR_SENSORROTAXISX[i] = INPUTMGR->getNextDeviceCode();
       OVR_SENSORROTAXISY[i] = INPUTMGR->getNextDeviceCode();
+
+      OVR_SENSORACCELERATION[i] = INPUTMGR->getNextDeviceCode();
+      OVR_SENSORANGVEL[i] = INPUTMGR->getNextDeviceCode();
+      OVR_SENSORMAGNETOMETER[i] = INPUTMGR->getNextDeviceCode();
    }
 
    // Build out the virtual map
@@ -155,16 +164,27 @@ void OculusVRSensorDevice::buildCodeTable()
       INPUTMGR->addVirtualMap( buffer, SI_ROT, OVR_SENSORROT[i] );
 
       dSprintf(buffer, 64, "ovr_sensorrotang%d", i);
-      INPUTMGR->addVirtualMap( buffer, SI_ROT, OVR_SENSORROTANG[i] );
+      INPUTMGR->addVirtualMap( buffer, SI_POS, OVR_SENSORROTANG[i] );
 
       dSprintf(buffer, 64, "ovr_sensorrotaxisx%d", i);
       INPUTMGR->addVirtualMap( buffer, SI_AXIS, OVR_SENSORROTAXISX[i] );
       dSprintf(buffer, 64, "ovr_sensorrotaxisy%d", i);
       INPUTMGR->addVirtualMap( buffer, SI_AXIS, OVR_SENSORROTAXISY[i] );
+
+      dSprintf(buffer, 64, "ovr_sensoracceleration%d", i);
+      INPUTMGR->addVirtualMap( buffer, SI_POS, OVR_SENSORACCELERATION[i] );
+
+      dSprintf(buffer, 64, "ovr_sensorangvel%d", i);
+      INPUTMGR->addVirtualMap( buffer, SI_POS, OVR_SENSORANGVEL[i] );
+
+      dSprintf(buffer, 64, "ovr_sensormagnetometer%d", i);
+      INPUTMGR->addVirtualMap( buffer, SI_POS, OVR_SENSORMAGNETOMETER[i] );
    }
 }
 
-bool OculusVRSensorDevice::process(U32 deviceType, bool generateRotAsAngAxis, bool generateRotAsEuler, bool generateRotationAsAxisEvents, F32 maxAxisRadius)
+//-----------------------------------------------------------------------------
+
+bool OculusVRSensorDevice::process(U32 deviceType, bool generateRotAsAngAxis, bool generateRotAsEuler, bool generateRotationAsAxisEvents, F32 maxAxisRadius, bool generateRawSensor)
 {
    if(!mIsValid)
       return false;
@@ -180,7 +200,7 @@ bool OculusVRSensorDevice::process(U32 deviceType, bool generateRotAsAngAxis, bo
    {
       currentBuffer->simulateData(maxAxisRadius);
    }
-   diff = mPrevData->compare(currentBuffer);
+   diff = mPrevData->compare(currentBuffer, generateRawSensor);
 
    // Update the previous data pointer.  We do this here in case someone calls our
    // console functions during one of the input events below.
@@ -215,9 +235,32 @@ bool OculusVRSensorDevice::process(U32 deviceType, bool generateRotAsAngAxis, bo
          INPUTMGR->buildInputEvent(deviceType, OculusVRConstants::DefaultOVRBase, SI_AXIS, OVR_SENSORROTAXISY[mActionCodeIndex], SI_MOVE, currentBuffer->mRotAxis.y);
    }
 
+   // Raw sensor event
+   if(generateRawSensor && diff & OculusVRSensorData::DIFF_RAW)
+   {
+      if(diff & OculusVRSensorData::DIFF_ACCEL)
+         INPUTMGR->buildInputEvent(deviceType, OculusVRConstants::DefaultOVRBase, SI_POS, OVR_SENSORACCELERATION[mActionCodeIndex], SI_MOVE, currentBuffer->mAcceleration);
+
+      if(diff & OculusVRSensorData::DIFF_ANGVEL)
+      {
+         // Convert angles to degrees
+         VectorF angles;
+         for(U32 i=0; i<3; ++i)
+         {
+            angles[i] = mRadToDeg(currentBuffer->mAngVelocity[i]);
+         }
+         INPUTMGR->buildInputEvent(deviceType, OculusVRConstants::DefaultOVRBase, SI_POS, OVR_SENSORANGVEL[mActionCodeIndex], SI_MOVE, angles);
+      }
+
+      if(diff & OculusVRSensorData::DIFF_MAG)
+         INPUTMGR->buildInputEvent(deviceType, OculusVRConstants::DefaultOVRBase, SI_POS, OVR_SENSORMAGNETOMETER[mActionCodeIndex], SI_MOVE, currentBuffer->mMagnetometer);
+   }
+
    return true;
 }
 
+//-----------------------------------------------------------------------------
+
 void OculusVRSensorDevice::reset()
 {
    if(!mIsValid)
@@ -242,6 +285,51 @@ void OculusVRSensorDevice::setPredictionTime(F32 dt)
    mSensorFusion.SetPrediction(dt);
 }
 
+bool OculusVRSensorDevice::getGravityCorrection() const
+{
+   if(!mIsValid)
+      return false;
+
+   return mSensorFusion.IsGravityEnabled();
+}
+
+void OculusVRSensorDevice::setGravityCorrection(bool state)
+{
+   if(!mIsValid)
+      return;
+
+   mSensorFusion.SetGravityEnabled(state);
+}
+
+bool OculusVRSensorDevice::getYawCorrection() const
+{
+   if(!mIsValid)
+      return false;
+
+   return mSensorFusion.IsYawCorrectionEnabled();
+}
+
+void OculusVRSensorDevice::setYawCorrection(bool state)
+{
+   if(!mIsValid)
+      return;
+
+   if(mYawCorrectionDisabled || !mSensorFusion.HasMagCalibration())
+      return;
+
+   mSensorFusion.SetYawCorrectionEnabled(state);
+}
+
+bool OculusVRSensorDevice::getMagnetometerCalibrationAvailable() const
+{
+   if(!mIsValid)
+      return false;
+
+   return mSensorFusion.HasMagCalibration();
+}
+
+//-----------------------------------------------------------------------------
+
 EulerF OculusVRSensorDevice::getEulerRotation()
 {
    if(!mIsValid)
@@ -263,3 +351,82 @@ EulerF OculusVRSensorDevice::getEulerRotation()
 
    return rot;
 }
+
+EulerF OculusVRSensorDevice::getRawEulerRotation()
+{
+   if(!mIsValid)
+      return Point3F::Zero;
+
+   OVR::Quatf orientation;
+   orientation = mSensorFusion.GetOrientation();
+
+   // Sensor rotation in Euler format
+   EulerF rot;
+   OculusVRUtil::convertRotation(orientation, rot);
+
+   return rot;
+}
+
+VectorF OculusVRSensorDevice::getAcceleration()
+{
+   if(!mIsValid)
+      return VectorF::Zero;
+
+   OVR::Vector3f a = mSensorFusion.GetAcceleration();
+   
+   // Sensor acceleration in VectorF format
+   VectorF acceleration;
+   OculusVRUtil::convertAcceleration(a, acceleration);
+
+   return acceleration;
+}
+
+EulerF OculusVRSensorDevice::getAngularVelocity()
+{
+   if(!mIsValid)
+      return EulerF::Zero;
+
+   OVR::Vector3f v = mSensorFusion.GetAngularVelocity();
+   
+   // Sensor angular velocity in EulerF format
+   EulerF vel;
+   OculusVRUtil::convertAngularVelocity(v, vel);
+
+   return vel;
+}
+
+VectorF OculusVRSensorDevice::getMagnetometer()
+{
+   if(!mIsValid)
+      return VectorF::Zero;
+
+   OVR::Vector3f m;
+   if(mSensorFusion.HasMagCalibration() && mSensorFusion.IsYawCorrectionEnabled())
+   {
+      m = mSensorFusion.GetCalibratedMagnetometer();
+   }
+   else
+   {
+      m = mSensorFusion.GetMagnetometer();
+   }
+   
+   // Sensor magnetometer reading in VectorF format
+   VectorF mag;
+   OculusVRUtil::convertMagnetometer(m, mag);
+
+   return mag;
+}
+
+VectorF OculusVRSensorDevice::getRawMagnetometer()
+{
+   if(!mIsValid)
+      return VectorF::Zero;
+
+   OVR::Vector3f m = mSensorFusion.GetMagnetometer();
+   
+   // Sensor magnetometer reading in VectorF format
+   VectorF mag;
+   OculusVRUtil::convertMagnetometer(m, mag);
+
+   return mag;
+}
diff --git a/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.h b/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.h
index c5ed30322..fcaca44d5 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.h
+++ b/Engine/source/platform/input/oculusVR/oculusVRSensorDevice.h
@@ -50,6 +50,10 @@ public:
    static U32 OVR_SENSORROTAXISX[OculusVRConstants::MaxSensors];  // SI_AXIS
    static U32 OVR_SENSORROTAXISY[OculusVRConstants::MaxSensors];
 
+   static U32 OVR_SENSORACCELERATION[OculusVRConstants::MaxSensors];    // SI_POS
+   static U32 OVR_SENSORANGVEL[OculusVRConstants::MaxSensors];          // SI_POS but is EulerF
+   static U32 OVR_SENSORMAGNETOMETER[OculusVRConstants::MaxSensors];    // SI_POS
+
 protected:
    bool mIsValid;
 
@@ -69,6 +73,9 @@ protected:
    U16      mProductId;
    String   mSerialNumber;
 
+   // Has yaw correction been disabled by the control panel
+   bool     mYawCorrectionDisabled;
+
    // Assigned by the OculusVRDevice
    S32 mActionCodeIndex;
 
@@ -99,7 +106,7 @@ public:
    bool isValid() const {return mIsValid;}
    bool isSimulated() {return mIsSimulation;}
 
-   bool process(U32 deviceType, bool generateRotAsAngAxis, bool generateRotAsEuler, bool generateRotationAsAxisEvents, F32 maxAxisRadius);
+   bool process(U32 deviceType, bool generateRotAsAngAxis, bool generateRotAsEuler, bool generateRotationAsAxisEvents, F32 maxAxisRadius, bool generateRawSensor);
 
    void reset();
 
@@ -109,6 +116,26 @@ public:
    // Set the prediction time for the sensor fusion.  The time is in seconds.
    void setPredictionTime(F32 dt);
 
+   // Is gravity correction enabled for pitch and roll
+   bool getGravityCorrection() const;
+
+   // Set the pitch and roll gravity correction
+   void setGravityCorrection(bool state);
+
+   // Has yaw correction been disabled using the control panel
+   bool getYawCorrectionUserDisabled() const { return mYawCorrectionDisabled; }
+
+   // Is yaw correction enabled
+   bool getYawCorrection() const;
+
+   // Set the yaw correction. Note: if magnetometer calibration data is not present,
+   // or user has disabled yaw correction in the control panel, this method will
+   // not enable it.
+   void setYawCorrection(bool state);
+
+   // Is magnetometer calibration data available for this sensor
+   bool getMagnetometerCalibrationAvailable() const;
+
    const char* getProductName() { return mProductName.c_str(); }
    const char* getManufacturer() { return mManufacturer.c_str(); }
    U32 getVersion() { return mVersion; }
@@ -116,7 +143,24 @@ public:
    U16 getProductId() { return mProductId; }
    const char* getSerialNumber() { return mSerialNumber; }
 
+   // Get the current rotation of the sensor.  Uses prediction if set.
    EulerF getEulerRotation();
+
+   // Get the current rotation of the sensor.
+   EulerF getRawEulerRotation();
+
+   // Get the current absolute acceleration reading, in m/s^2
+   VectorF getAcceleration();
+
+   // Get the current angular velocity reading, in rad/s
+   EulerF getAngularVelocity();
+
+   // Get the current magnetometer reading (direction and field strength), in Gauss.
+   // Uses magnetometer calibration if set.
+   VectorF getMagnetometer();
+
+   // Get the current raw magnetometer reading (direction and field strength), in Gauss
+   VectorF getRawMagnetometer();
 };
 
 #endif   // _OCULUSVRSENSORDEVICE_H_
diff --git a/Engine/source/platform/input/oculusVR/oculusVRUtil.cpp b/Engine/source/platform/input/oculusVR/oculusVRUtil.cpp
index 46b4c7a34..69ddbc380 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRUtil.cpp
+++ b/Engine/source/platform/input/oculusVR/oculusVRUtil.cpp
@@ -73,4 +73,19 @@ void calculateAxisRotation(const MatrixF& inRotation, const F32& maxAxisRadius,
    outRotation.y = axis.y;
 }
 
-}
\ No newline at end of file
+void convertAcceleration(OVR::Vector3f& inAcceleration, VectorF& outAcceleration)
+{
+   outAcceleration.set(inAcceleration.x, -inAcceleration.z, inAcceleration.y);
+}
+
+void convertAngularVelocity(OVR::Vector3f& inAngVel, EulerF& outAngVel)
+{
+   outAngVel.set(-inAngVel.x, inAngVel.z, -inAngVel.y);
+}
+
+void convertMagnetometer(OVR::Vector3f& inMagnetometer, VectorF& outMagnetometer)
+{
+   outMagnetometer.set(inMagnetometer.x, -inMagnetometer.z, inMagnetometer.y);
+}
+
+}
diff --git a/Engine/source/platform/input/oculusVR/oculusVRUtil.h b/Engine/source/platform/input/oculusVR/oculusVRUtil.h
index f4088f3c1..ccf6018e9 100644
--- a/Engine/source/platform/input/oculusVR/oculusVRUtil.h
+++ b/Engine/source/platform/input/oculusVR/oculusVRUtil.h
@@ -37,6 +37,15 @@ namespace OculusVRUtil
 
    /// Calcualte a sensor's rotation as if it were a thumb stick axis
    void calculateAxisRotation(const MatrixF& inRotation, const F32& maxAxisRadius, Point2F& outRotation);
+
+   /// Convert an OVR sensor's acceleration to Torque 3D vector (in m/s^2)
+   void convertAcceleration(OVR::Vector3f& inAcceleration, VectorF& outAcceleration);
+
+   /// Convert OVR sensor's angular velocity to Torque 3D Euler angles (in radians/s)
+   void convertAngularVelocity(OVR::Vector3f& inAngVel, EulerF& outAngVel);
+
+   /// Convert an OVR sensor's magnetometer reading (direction and field strength) to Torque 3D vector (in Gauss)
+   void convertMagnetometer(OVR::Vector3f& inMagnetometer, VectorF& outMagnetometer);
 }
 
 #endif   // _OCULUSVRUTIL_H_
diff --git a/Engine/source/platformWin32/videoInfo/wmiVideoInfo.cpp b/Engine/source/platformWin32/videoInfo/wmiVideoInfo.cpp
index ad3bb3f99..23f63635d 100644
--- a/Engine/source/platformWin32/videoInfo/wmiVideoInfo.cpp
+++ b/Engine/source/platformWin32/videoInfo/wmiVideoInfo.cpp
@@ -568,9 +568,20 @@ bool WMIVideoInfo::_queryPropertyWMI( const PVIQueryType queryType, const U32 ad
             LONG longVal = v.lVal;
 
             if( queryType == PVI_VRAM )
+            {
                longVal = longVal >> 20; // Convert to megabytes
 
-            *outValue = String::ToString( (S32)longVal );
+               // While this value is reported as a signed integer, it is possible
+               // for video cards to have 2GB or more.  In those cases the signed
+               // bit is set and will give us a negative number.  Treating this
+               // as unsigned will allows us to handle video cards with up to
+               // 4GB of memory.  After that we'll need a new solution from Microsoft.
+               *outValue = String::ToString( (U32)longVal );
+            }
+            else
+            {
+               *outValue = String::ToString( (S32)longVal );
+            }
             break;
          }
 
diff --git a/Engine/source/postFx/postEffect.cpp b/Engine/source/postFx/postEffect.cpp
index 1dcfae1ed..aff1e6f2c 100644
--- a/Engine/source/postFx/postEffect.cpp
+++ b/Engine/source/postFx/postEffect.cpp
@@ -45,6 +45,7 @@
 #include "postFx/postEffectManager.h"
 #include "postFx/postEffectVis.h"
 
+using namespace Torque;
 
 ConsoleDocClass( PostEffect, 
    "@brief A fullscreen shader effect.\n\n"
@@ -111,6 +112,15 @@ ImplementEnumType( PFXTargetClear,
    { PFXTargetClear_OnDraw, "PFXTargetClear_OnDraw", "Clear before every draw.\n" },
 EndImplementEnumType;
 
+ImplementEnumType( PFXTargetViewport,
+   "Specifies how the viewport should be set up for a PostEffect's target.\n"
+   "@note Applies to both the diffuse target and the depth target (if defined).\n"
+   "@ingroup Rendering\n\n")
+   { PFXTargetViewport_TargetSize, "PFXTargetViewport_TargetSize", "Set viewport to match target size (default).\n" },
+   { PFXTargetViewport_GFXViewport, "PFXTargetViewport_GFXViewport", "Use the current GFX viewport (scaled to match target size).\n" },
+   { PFXTargetViewport_NamedInTexture0, "PFXTargetViewport_NamedInTexture0", "Use the input texture 0 if it is named (scaled to match target size), otherwise revert to PFXTargetViewport_TargetSize if there is none.\n" },
+EndImplementEnumType;
+
 
 GFXImplementVertexFormat( PFXVertex )
 {
@@ -234,6 +244,7 @@ PostEffect::PostEffect()
       mStateBlockData( NULL ),
       mAllowReflectPass( false ),
       mTargetClear( PFXTargetClear_None ),
+      mTargetViewport( PFXTargetViewport_TargetSize ),
       mTargetScale( Point2F::One ),
       mTargetSize( Point2I::Zero ),
       mTargetFormat( GFXFormatR8G8B8A8 ),
@@ -245,6 +256,7 @@ PostEffect::PostEffect()
       mRTSizeSC( NULL ),
       mOneOverRTSizeSC( NULL ),
       mViewportOffsetSC( NULL ),
+      mTargetViewportSC( NULL ),
       mFogDataSC( NULL ),
       mFogColorSC( NULL ),
       mEyePosSC( NULL ),
@@ -254,6 +266,7 @@ PostEffect::PostEffect()
       mNearFarSC( NULL ),
       mInvNearFarSC( NULL ),
       mWorldToScreenScaleSC( NULL ),
+      mProjectionOffsetSC( NULL ),
       mWaterColorSC( NULL ),
       mWaterFogDataSC( NULL ),
       mAmbientColorSC( NULL ),
@@ -311,6 +324,9 @@ void PostEffect::initPersistFields()
    addField( "targetClear", TYPEID< PFXTargetClear >(), Offset( mTargetClear, PostEffect ),
       "Describes when the target texture should be cleared." );
 
+   addField( "targetViewport", TYPEID< PFXTargetViewport >(), Offset( mTargetViewport, PostEffect ),
+      "Specifies how the viewport should be set up for a target texture." );
+
    addField( "texture", TypeImageFilename, Offset( mTexFilename, PostEffect ), NumTextures,
       "Input textures to this effect ( samplers ).\n"
       "@see PFXTextureIdentifiers" );
@@ -527,6 +543,8 @@ void PostEffect::_setupConstants( const SceneRenderState *state )
 
       //mViewportSC = shader->getShaderConstHandle( "$viewport" );
 
+      mTargetViewportSC = mShader->getShaderConstHandle( "$targetViewport" );
+
       mFogDataSC = mShader->getShaderConstHandle( ShaderGenVars::fogData );
       mFogColorSC = mShader->getShaderConstHandle( ShaderGenVars::fogColor );
 
@@ -540,6 +558,8 @@ void PostEffect::_setupConstants( const SceneRenderState *state )
       mMatScreenToWorldSC = mShader->getShaderConstHandle( "$matScreenToWorld" );
       mMatPrevScreenToWorldSC = mShader->getShaderConstHandle( "$matPrevScreenToWorld" );
 
+      mProjectionOffsetSC = mShader->getShaderConstHandle( "$projectionOffset" );
+
       mWaterColorSC = mShader->getShaderConstHandle( "$waterColor" );
       mAmbientColorSC = mShader->getShaderConstHandle( "$ambientColor" );
       mWaterFogDataSC = mShader->getShaderConstHandle( "$waterFogData" );
@@ -607,6 +627,27 @@ void PostEffect::_setupConstants( const SceneRenderState *state )
       mShaderConsts->set( mRenderTargetParamsSC[i], rtParams );
    }
 
+   // Target viewport (in target space)
+   if ( mTargetViewportSC->isValid() )
+   {
+      const Point2I& targetSize = GFX->getActiveRenderTarget()->getSize();
+      Point3I size(targetSize.x, targetSize.y, 0);
+      const RectI& viewport = GFX->getViewport();
+
+      Point2F offset((F32)viewport.point.x / (F32)targetSize.x, (F32)viewport.point.y / (F32)targetSize.y );
+      Point2F scale((F32)viewport.extent.x / (F32)targetSize.x, (F32)viewport.extent.y / (F32)targetSize.y );
+
+      const Point2F halfPixel( 0.5f / targetSize.x, 0.5f / targetSize.y );
+
+      Point4F targetParams;
+      targetParams.x = offset.x + halfPixel.x;
+      targetParams.y = offset.y + halfPixel.y;
+      targetParams.z = offset.x + scale.x - halfPixel.x;
+      targetParams.w = offset.y + scale.y - halfPixel.y;
+
+      mShaderConsts->set( mTargetViewportSC, targetParams );
+   }
+
    // Set the fog data.
    if ( mFogDataSC->isValid() )
    {
@@ -678,6 +719,7 @@ void PostEffect::_setupConstants( const SceneRenderState *state )
       mShaderConsts->setSafe( mNearFarSC, Point2F( state->getNearPlane(), state->getFarPlane() ) );
       mShaderConsts->setSafe( mInvNearFarSC, Point2F( 1.0f / state->getNearPlane(), 1.0f / state->getFarPlane() ) );
       mShaderConsts->setSafe( mWorldToScreenScaleSC, state->getWorldToScreenScale() );
+      mShaderConsts->setSafe( mProjectionOffsetSC, state->getCameraFrustum().getProjectionOffset() );
       mShaderConsts->setSafe( mFogColorSC, state->getSceneManager()->getFogData().color );
 
       if ( mWaterColorSC->isValid() )
@@ -708,7 +750,7 @@ void PostEffect::_setupConstants( const SceneRenderState *state )
       {
          // Grab our projection matrix
          // from the frustum.
-         Frustum frust = state->getFrustum();
+         Frustum frust = state->getCameraFrustum();
          MatrixF proj( true );
          frust.getProjectionMatrix( &proj );
 
@@ -926,7 +968,33 @@ void PostEffect::_setupTarget( const SceneRenderState *state, bool *outClearTarg
          if ( mTargetClear == PFXTargetClear_OnCreate )
             *outClearTarget = true;
 
-         mNamedTarget.setViewport( RectI( 0, 0, targetSize.x, targetSize.y ) );
+         if(mTargetViewport == PFXTargetViewport_GFXViewport)
+         {
+            // We may need to scale the GFX viewport to fit within
+            // our target texture size
+            GFXTarget *oldTarget = GFX->getActiveRenderTarget();
+            const Point2I &oldTargetSize = oldTarget->getSize();
+            Point2F scale(targetSize.x / F32(oldTargetSize.x), targetSize.y / F32(oldTargetSize.y));
+
+            const RectI &viewport = GFX->getViewport();
+
+            mNamedTarget.setViewport( RectI( viewport.point.x*scale.x, viewport.point.y*scale.y, viewport.extent.x*scale.x, viewport.extent.y*scale.y ) );
+         }
+         else if(mTargetViewport == PFXTargetViewport_NamedInTexture0 && mActiveNamedTarget[0] && mActiveNamedTarget[0]->getTexture())
+         {
+            // Scale the named input texture's viewport to match our target
+            const Point3I &namedTargetSize = mActiveNamedTarget[0]->getTexture()->getSize();
+            Point2F scale(targetSize.x / F32(namedTargetSize.x), targetSize.y / F32(namedTargetSize.y));
+
+            const RectI &viewport = mActiveNamedTarget[0]->getViewport();
+
+            mNamedTarget.setViewport( RectI( viewport.point.x*scale.x, viewport.point.y*scale.y, viewport.extent.x*scale.x, viewport.extent.y*scale.y ) );
+         }
+         else
+         {
+            // PFXTargetViewport_TargetSize
+            mNamedTarget.setViewport( RectI( 0, 0, targetSize.x, targetSize.y ) );
+         }
       }
    }
    else
@@ -972,7 +1040,33 @@ void PostEffect::_setupTarget( const SceneRenderState *state, bool *outClearTarg
          if ( mTargetClear == PFXTargetClear_OnCreate )
             *outClearTarget = true;
 
-         mNamedTargetDepthStencil.setViewport( RectI( 0, 0, targetSize.x, targetSize.y ) );
+         if(mTargetViewport == PFXTargetViewport_GFXViewport)
+         {
+            // We may need to scale the GFX viewport to fit within
+            // our target texture size
+            GFXTarget *oldTarget = GFX->getActiveRenderTarget();
+            const Point2I &oldTargetSize = oldTarget->getSize();
+            Point2F scale(targetSize.x / F32(oldTargetSize.x), targetSize.y / F32(oldTargetSize.y));
+
+            const RectI &viewport = GFX->getViewport();
+
+            mNamedTargetDepthStencil.setViewport( RectI( viewport.point.x*scale.x, viewport.point.y*scale.y, viewport.extent.x*scale.x, viewport.extent.y*scale.y ) );
+         }
+         else if(mTargetViewport == PFXTargetViewport_NamedInTexture0 && mActiveNamedTarget[0] && mActiveNamedTarget[0]->getTexture())
+         {
+            // Scale the named input texture's viewport to match our target
+            const Point3I &namedTargetSize = mActiveNamedTarget[0]->getTexture()->getSize();
+            Point2F scale(targetSize.x / F32(namedTargetSize.x), targetSize.y / F32(namedTargetSize.y));
+
+            const RectI &viewport = mActiveNamedTarget[0]->getViewport();
+
+            mNamedTargetDepthStencil.setViewport( RectI( viewport.point.x*scale.x, viewport.point.y*scale.y, viewport.extent.x*scale.x, viewport.extent.y*scale.y ) );
+         }
+         else
+         {
+            // PFXTargetViewport_TargetSize
+            mNamedTargetDepthStencil.setViewport( RectI( 0, 0, targetSize.x, targetSize.y ) );
+         }
       }
    }
    else
@@ -1063,6 +1157,9 @@ void PostEffect::process(  const SceneRenderState *state,
          GFX->getActiveRenderTarget()->preserve();
 #endif
 
+      const RectI &oldViewport = GFX->getViewport();
+      GFXTarget *oldTarget = GFX->getActiveRenderTarget();
+
       GFX->pushActiveRenderTarget();
       mTarget->attachTexture( GFXTextureTarget::Color0, mTargetTex );
 
@@ -1072,7 +1169,38 @@ void PostEffect::process(  const SceneRenderState *state,
       else
          mTarget->attachTexture( GFXTextureTarget::DepthStencil, mTargetDepthStencil );
 
-      GFX->setActiveRenderTarget( mTarget );
+      // Set the render target but not its viewport.  We'll do that below.
+      GFX->setActiveRenderTarget( mTarget, false );
+
+      if(mNamedTarget.isRegistered())
+      {
+         // Always use the name target's viewport, if available.  It was set up in _setupTarget().
+         GFX->setViewport(mNamedTarget.getViewport());
+      }
+      else if(mTargetViewport == PFXTargetViewport_GFXViewport)
+      {
+         // Go with the current viewport as scaled against our render target.
+         const Point2I &oldTargetSize = oldTarget->getSize();
+         const Point2I &targetSize = mTarget->getSize();
+         Point2F scale(targetSize.x / F32(oldTargetSize.x), targetSize.y / F32(oldTargetSize.y));
+         GFX->setViewport( RectI( oldViewport.point.x*scale.x, oldViewport.point.y*scale.y, oldViewport.extent.x*scale.x, oldViewport.extent.y*scale.y ) );
+      }
+      else if(mTargetViewport == PFXTargetViewport_NamedInTexture0 && mActiveNamedTarget[0] && mActiveNamedTarget[0]->getTexture())
+      {
+         // Go with the first input texture, if it is named.  Scale the named input texture's viewport to match our target
+         const Point3I &namedTargetSize = mActiveNamedTarget[0]->getTexture()->getSize();
+         const Point2I &targetSize = mTarget->getSize();
+         Point2F scale(targetSize.x / F32(namedTargetSize.x), targetSize.y / F32(namedTargetSize.y));
+
+         const RectI &viewport = mActiveNamedTarget[0]->getViewport();
+
+         GFX->setViewport( RectI( viewport.point.x*scale.x, viewport.point.y*scale.y, viewport.extent.x*scale.x, viewport.extent.y*scale.y ) );
+      }
+      else
+      {
+         // Default to using the whole target as the viewport
+         GFX->setViewport( RectI( Point2I::Zero, mTarget->getSize() ) );
+      }
    }
 
    if ( clearTarget )
@@ -1091,7 +1219,7 @@ void PostEffect::process(  const SceneRenderState *state,
 
    Frustum frustum;
    if ( state )
-      frustum = state->getFrustum();
+      frustum = state->getCameraFrustum();
    else
    {
       // If we don't have a scene state then setup
diff --git a/Engine/source/postFx/postEffect.h b/Engine/source/postFx/postEffect.h
index 71ee725e8..4ea75595f 100644
--- a/Engine/source/postFx/postEffect.h
+++ b/Engine/source/postFx/postEffect.h
@@ -118,6 +118,8 @@ protected:
 
    GFXShaderConstHandle *mViewportOffsetSC;
 
+   GFXShaderConstHandle *mTargetViewportSC;
+
    GFXShaderConstHandle *mFogDataSC;
    GFXShaderConstHandle *mFogColorSC;
    GFXShaderConstHandle *mEyePosSC;
@@ -127,6 +129,7 @@ protected:
    GFXShaderConstHandle *mNearFarSC;
    GFXShaderConstHandle *mInvNearFarSC;   
    GFXShaderConstHandle *mWorldToScreenScaleSC;
+   GFXShaderConstHandle *mProjectionOffsetSC;
    GFXShaderConstHandle *mWaterColorSC;
    GFXShaderConstHandle *mWaterFogDataSC;     
    GFXShaderConstHandle *mAmbientColorSC;
@@ -170,6 +173,7 @@ protected:
 
    PFXRenderTime mRenderTime;
    PFXTargetClear mTargetClear;
+   PFXTargetViewport mTargetViewport;
 
    String mRenderBin;
 
diff --git a/Engine/source/postFx/postEffectCommon.h b/Engine/source/postFx/postEffectCommon.h
index dcbbdda27..00bd2d4d4 100644
--- a/Engine/source/postFx/postEffectCommon.h
+++ b/Engine/source/postFx/postEffectCommon.h
@@ -67,6 +67,24 @@ enum PFXTargetClear
 
 DefineEnumType( PFXTargetClear );
 
+
+/// PFXTargetViewport specifies how the viewport should be
+/// set up for a PostEffect's target.
+enum PFXTargetViewport
+{
+   /// The default viewport set up to match the target size
+   PFXTargetViewport_TargetSize,
+
+   /// Use the current GFX viewport
+   PFXTargetViewport_GFXViewport,
+
+   /// Use the input texture 0 if it is named, otherwise
+   /// revert to PFXTargetViewport_TargetSize if there is none
+   PFXTargetViewport_NamedInTexture0,
+};
+
+DefineEnumType( PFXTargetViewport );
+
 ///
 struct PFXFrameState
 {
diff --git a/Engine/source/renderInstance/renderGlowMgr.cpp b/Engine/source/renderInstance/renderGlowMgr.cpp
index 547d20a7f..86bbadcbc 100644
--- a/Engine/source/renderInstance/renderGlowMgr.cpp
+++ b/Engine/source/renderInstance/renderGlowMgr.cpp
@@ -151,6 +151,9 @@ void RenderGlowMgr::render( SceneRenderState *state )
 
    GFXTransformSaver saver;
 
+   // Respect the current viewport
+   mNamedTarget.setViewport(GFX->getViewport());
+
    // Tell the superclass we're about to render, preserve contents
    const bool isRenderingToTarget = _onPreRender( state, true );
 
diff --git a/Engine/source/renderInstance/renderImposterMgr.cpp b/Engine/source/renderInstance/renderImposterMgr.cpp
index c225108b5..8bd168cbd 100644
--- a/Engine/source/renderInstance/renderImposterMgr.cpp
+++ b/Engine/source/renderInstance/renderImposterMgr.cpp
@@ -288,8 +288,12 @@ void RenderImposterMgr::_innerRender( const SceneRenderState *state, RenderPrePa
                   smBatches++;
                
                   vb.set( GFX, stateCount*4, GFXBufferTypeVolatile );
-                  dMemcpy( vb.lock(), mBuffer, stateCount * 4 * sizeof( ImposterState ) );
-                  vb.unlock();
+                  ImposterState *buf = vb.lock();
+                  if(buf)
+                  {
+                     dMemcpy( buf, mBuffer, stateCount * 4 * sizeof( ImposterState ) );
+                     vb.unlock();
+                  }
                
                   //GFX->setVertexBuffer( mCornerVB, 0, stateCount * 4 );
                   GFX->setVertexBuffer( vb );
@@ -327,8 +331,12 @@ void RenderImposterMgr::_innerRender( const SceneRenderState *state, RenderPrePa
                smBatches++;
 
                vb.set( GFX, stateCount*4, GFXBufferTypeVolatile );
-               dMemcpy( vb.lock(), mBuffer, stateCount * 4 * sizeof( ImposterState ) );
-               vb.unlock();
+               ImposterState *buf = vb.lock();
+               if(buf)
+               {
+                  dMemcpy( buf, mBuffer, stateCount * 4 * sizeof( ImposterState ) );
+                  vb.unlock();
+               }
                
                //GFX->setVertexBuffer( mCornerVB, 0, stateCount * 4 );
                GFX->setVertexBuffer( vb );
diff --git a/Engine/source/scene/culling/sceneCullingState.cpp b/Engine/source/scene/culling/sceneCullingState.cpp
index b8608160d..f3b4022fa 100644
--- a/Engine/source/scene/culling/sceneCullingState.cpp
+++ b/Engine/source/scene/culling/sceneCullingState.cpp
@@ -68,17 +68,21 @@ SceneCullingState::SceneCullingState( SceneManager* sceneManager, const SceneCam
    mZoneVisibilityFlags.setSize( numZones );
    mZoneVisibilityFlags.clear();
 
+   // Culling frustum
+
+   mCullingFrustum = mCameraState.getFrustum();
+   mCullingFrustum.bakeProjectionOffset();
+
    // Construct the root culling volume from
-   // the camera's view frustum.  Omit the frustum's
+   // the culling frustum.  Omit the frustum's
    // near and far plane so we don't test it repeatedly.
 
-   const Frustum& frustum = mCameraState.getFrustum();
    PlaneF* planes = allocateData< PlaneF >( 4 );
 
-   planes[ 0 ] = frustum.getPlanes()[ Frustum::PlaneLeft ];
-   planes[ 1 ] = frustum.getPlanes()[ Frustum::PlaneRight ];
-   planes[ 2 ] = frustum.getPlanes()[ Frustum::PlaneTop];
-   planes[ 3 ] = frustum.getPlanes()[ Frustum::PlaneBottom ];
+   planes[ 0 ] = mCullingFrustum.getPlanes()[ Frustum::PlaneLeft ];
+   planes[ 1 ] = mCullingFrustum.getPlanes()[ Frustum::PlaneRight ];
+   planes[ 2 ] = mCullingFrustum.getPlanes()[ Frustum::PlaneTop];
+   planes[ 3 ] = mCullingFrustum.getPlanes()[ Frustum::PlaneBottom ];
 
    mRootVolume = SceneCullingVolume(
       SceneCullingVolume::Includer,
@@ -219,7 +223,7 @@ bool SceneCullingState::createCullingVolume( const Point3F* vertices, U32 numVer
 {
    const Point3F& viewPos = getCameraState().getViewPosition();
    const Point3F& viewDir = getCameraState().getViewDirection();
-   const bool isOrtho = getFrustum().isOrtho();
+   const bool isOrtho = getCullingFrustum().isOrtho();
 
    //TODO: check if we need to handle penetration of the near plane for occluders specially
 
@@ -440,8 +444,8 @@ bool SceneCullingState::createCullingVolume( const Point3F* vertices, U32 numVer
 
    if( type == SceneCullingVolume::Occluder )
    {
-      const F32 widthEstimatePercentage = widthEstimate / getFrustum().getWidth();
-      const F32 heightEstimatePercentage = heightEstimate / getFrustum().getHeight();
+      const F32 widthEstimatePercentage = widthEstimate / getCullingFrustum().getWidth();
+      const F32 heightEstimatePercentage = heightEstimate / getCullingFrustum().getHeight();
 
       if( widthEstimatePercentage < smOccluderMinWidthPercentage ||
           heightEstimatePercentage < smOccluderMinHeightPercentage )
@@ -614,7 +618,7 @@ inline SceneZoneCullingState::CullingTestResult SceneCullingState::_test( const
 
    if( disableZoneCulling() )
    {
-      if( !OCCLUDERS_ONLY && !getFrustum().isCulled( bounds ) )
+      if( !OCCLUDERS_ONLY && !getCullingFrustum().isCulled( bounds ) )
          return SceneZoneCullingState::CullingTestPositiveByInclusion;
 
       return SceneZoneCullingState::CullingTestNegative;
@@ -631,7 +635,7 @@ inline SceneZoneCullingState::CullingTestResult SceneCullingState::_test( const
    }
    else
    {
-      const PlaneF* frustumPlanes = getFrustum().getPlanes();
+      const PlaneF* frustumPlanes = getCullingFrustum().getPlanes();
 
       return _test(
          bounds,
@@ -715,8 +719,8 @@ U32 SceneCullingState::cullObjects( SceneObject** objects, U32 numObjects, U32 c
 
    // We test near and far planes separately in order to not do the tests
    // repeatedly, so fetch the planes now.
-   const PlaneF& nearPlane = getFrustum().getPlanes()[ Frustum::PlaneNear ];
-   const PlaneF& farPlane = getFrustum().getPlanes()[ Frustum::PlaneFar ];
+   const PlaneF& nearPlane = getCullingFrustum().getPlanes()[ Frustum::PlaneNear ];
+   const PlaneF& farPlane = getCullingFrustum().getPlanes()[ Frustum::PlaneFar ];
 
    for( U32 i = 0; i < numObjects; ++ i )
    {
@@ -766,7 +770,7 @@ U32 SceneCullingState::cullObjects( SceneObject** objects, U32 numObjects, U32 c
                ( object->getTypeMask() & CULLING_EXCLUDE_TYPEMASK ) ||
                disableZoneCulling() )
       {
-         isCulled = getFrustum().isCulled( object->getWorldBox() );
+         isCulled = getCullingFrustum().isCulled( object->getWorldBox() );
       }
 
       // Go through the zones that the object is assigned to and
@@ -881,8 +885,8 @@ void SceneCullingState::debugRenderCullingVolumes() const
    const ColorI occluderColor( 255, 0, 0, 255 );
    const ColorI includerColor( 0, 255, 0, 255 );
 
-   const PlaneF& nearPlane = getFrustum().getPlanes()[ Frustum::PlaneNear ];
-   const PlaneF& farPlane = getFrustum().getPlanes()[ Frustum::PlaneFar ];
+   const PlaneF& nearPlane = getCullingFrustum().getPlanes()[ Frustum::PlaneNear ];
+   const PlaneF& farPlane = getCullingFrustum().getPlanes()[ Frustum::PlaneFar ];
 
    DebugDrawer* drawer = DebugDrawer::get();
    const SceneZoneSpaceManager* zoneManager = mSceneManager->getZoneManager();
diff --git a/Engine/source/scene/culling/sceneCullingState.h b/Engine/source/scene/culling/sceneCullingState.h
index b7a231da8..b63b219c2 100644
--- a/Engine/source/scene/culling/sceneCullingState.h
+++ b/Engine/source/scene/culling/sceneCullingState.h
@@ -100,9 +100,12 @@ class SceneCullingState
       /// The viewing state that defines how the scene is being viewed.
       SceneCameraState mCameraState;
 
-      /// The root culling volume corresponding to the camera frustum.
+      /// The root culling volume corresponding to the culling frustum.
       SceneCullingVolume mRootVolume;
 
+      /// The root culling frustum, which may be different from the camera frustum
+      Frustum mCullingFrustum;
+
       /// Occluders that have been added to this render state.  Adding an occluder does not
       /// necessarily result in an occluder volume being added.  To not repeatedly try to
       /// process the same occluder object, all objects that are added are recorded here.
@@ -136,7 +139,10 @@ class SceneCullingState
       SceneManager* getSceneManager() const { return mSceneManager; }
 
       /// Return the root frustum which is used to set up scene visibility.
-      const Frustum& getFrustum() const { return getCameraState().getFrustum(); }
+      const Frustum& getCullingFrustum() const { return mCullingFrustum; }
+
+      /// Return the root frustum which is used to set up scene visibility.
+      const Frustum& getCameraFrustum() const { return getCameraState().getFrustum(); }
 
       /// Return the viewing state that defines how the scene is being viewed.
       const SceneCameraState& getCameraState() const { return mCameraState; }
diff --git a/Engine/source/scene/fogStructs.h b/Engine/source/scene/fogStructs.h
index dd003db4f..874d2542e 100644
--- a/Engine/source/scene/fogStructs.h
+++ b/Engine/source/scene/fogStructs.h
@@ -59,7 +59,7 @@ struct WaterFogData
       wetDepth = 0.0f;
       wetDarkening = 0.0f;
       color.set( 0.5f, 0.5f, 0.5f, 1.0f );
-      plane.set( 0.0f, 0.0f, 1.0f );
+      plane.set( 0.0f, 0.0f, 1.0f, 1e10 ); // Default to global bounds distance
       depthGradMax = 0.0f;
    }
 };
diff --git a/Engine/source/scene/reflectionManager.cpp b/Engine/source/scene/reflectionManager.cpp
index 98eaee120..c6b78ad65 100644
--- a/Engine/source/scene/reflectionManager.cpp
+++ b/Engine/source/scene/reflectionManager.cpp
@@ -290,6 +290,7 @@ bool ReflectionManager::_handleDeviceEvent( GFXDevice::GFXDeviceEventType evt )
    switch( evt )
    {
    case GFXDevice::deStartOfFrame:
+   case GFXDevice::deStartOfField:
 
       mUpdateRefract = true;
       break;
diff --git a/Engine/source/scene/reflector.cpp b/Engine/source/scene/reflector.cpp
index df2e7f580..2b96737e6 100644
--- a/Engine/source/scene/reflector.cpp
+++ b/Engine/source/scene/reflector.cpp
@@ -420,7 +420,7 @@ void CubeReflector::updateFace( const ReflectParams &params, U32 faceidx )
    reflectRenderState.disableAdvancedLightingBins(true);
 
    // render scene
-   LIGHTMGR->registerGlobalLights( &reflectRenderState.getFrustum(), false );
+   LIGHTMGR->registerGlobalLights( &reflectRenderState.getCullingFrustum(), false );
    gClientSceneGraph->renderSceneNoLights( &reflectRenderState, mDesc->objectTypeMask );
    LIGHTMGR->unregisterAllLights();
 
@@ -546,7 +546,12 @@ void PlaneReflector::updateReflection( const ReflectParams &params )
    // store current matrices
    GFXTransformSaver saver;
    
-   F32 aspectRatio = F32( params.viewportExtent.x ) / F32( params.viewportExtent.y );
+   Point2I viewport(params.viewportExtent);
+   if(GFX->getCurrentRenderStyle() == GFXDevice::RS_StereoSideBySide)
+   {
+      viewport.x *= 0.5f;
+   }
+   F32 aspectRatio = F32( viewport.x ) / F32( viewport.y );
 
    Frustum frustum;
    frustum.set(false, params.query->fov, aspectRatio, params.query->nearPlane, params.query->farPlane);
@@ -562,12 +567,138 @@ void PlaneReflector::updateReflection( const ReflectParams &params )
    mLastDir = params.query->cameraMatrix.getForwardVector();
    mLastPos = params.query->cameraMatrix.getPosition();
 
+   setGFXMatrices( params.query->cameraMatrix );
+
+   // Adjust the detail amount
+   F32 detailAdjustBackup = TSShapeInstance::smDetailAdjust;
+   TSShapeInstance::smDetailAdjust *= mDesc->detailAdjust;
+
+
+   if(reflectTarget.isNull())
+      reflectTarget = GFX->allocRenderToTextureTarget();
+   reflectTarget->attachTexture( GFXTextureTarget::Color0, reflectTex );
+   reflectTarget->attachTexture( GFXTextureTarget::DepthStencil, depthBuff );
+   GFX->pushActiveRenderTarget();
+   GFX->setActiveRenderTarget( reflectTarget );   
+
+   U32 objTypeFlag = -1;
+   SceneCameraState reflectCameraState = SceneCameraState::fromGFX();
+   LIGHTMGR->registerGlobalLights( &reflectCameraState.getFrustum(), false );
+
+   // Since we can sometime be rendering a reflection for 1 or 2 frames before
+   // it gets updated do to the lag associated with getting the results from
+   // a HOQ we can sometimes see into parts of the reflection texture that
+   // have nothing but clear color ( eg. under the water ).
+   // To make this look less crappy use the ambient color of the sun.
+   //
+   // In the future we may want to fix this instead by having the scatterSky
+   // render a skirt or something in its lower half.
+   //
+   ColorF clearColor = gClientSceneGraph->getAmbientLightColor();
+   GFX->clear( GFXClearZBuffer | GFXClearStencil | GFXClearTarget, clearColor, 1.0f, 0 );
+
+   if(GFX->getCurrentRenderStyle() == GFXDevice::RS_StereoSideBySide)
+   {
+      // Store previous values
+      RectI originalVP = GFX->getViewport();
+
+      Point2F projOffset = GFX->getCurrentProjectionOffset();
+      Point3F eyeOffset = GFX->getStereoEyeOffset();
+
+      // Render left half of display
+      RectI leftVP = originalVP;
+      leftVP.extent.x *= 0.5;
+      GFX->setViewport(leftVP);
+
+      MatrixF leftWorldTrans(true);
+      leftWorldTrans.setPosition(Point3F(eyeOffset.x, eyeOffset.y, eyeOffset.z));
+      MatrixF leftWorld(params.query->cameraMatrix);
+      leftWorld.mulL(leftWorldTrans);
+
+      Frustum gfxFrustum = GFX->getFrustum();
+      gfxFrustum.setProjectionOffset(Point2F(projOffset.x, projOffset.y));
+      GFX->setFrustum(gfxFrustum);
+
+      setGFXMatrices( leftWorld );
+
+      SceneCameraState cameraStateLeft = SceneCameraState::fromGFX();
+      SceneRenderState renderStateLeft( gClientSceneGraph, SPT_Reflect, cameraStateLeft );
+      renderStateLeft.setSceneRenderStyle(SRS_SideBySide);
+      renderStateLeft.setSceneRenderField(0);
+      renderStateLeft.getMaterialDelegate().bind( REFLECTMGR, &ReflectionManager::getReflectionMaterial );
+      renderStateLeft.setDiffuseCameraTransform( params.query->cameraMatrix );
+      renderStateLeft.disableAdvancedLightingBins(true);
+
+      gClientSceneGraph->renderSceneNoLights( &renderStateLeft, objTypeFlag );
+
+      // Render right half of display
+      RectI rightVP = originalVP;
+      rightVP.extent.x *= 0.5;
+      rightVP.point.x += rightVP.extent.x;
+      GFX->setViewport(rightVP);
+
+      MatrixF rightWorldTrans(true);
+      rightWorldTrans.setPosition(Point3F(-eyeOffset.x, eyeOffset.y, eyeOffset.z));
+      MatrixF rightWorld(params.query->cameraMatrix);
+      rightWorld.mulL(rightWorldTrans);
+
+      gfxFrustum = GFX->getFrustum();
+      gfxFrustum.setProjectionOffset(Point2F(-projOffset.x, projOffset.y));
+      GFX->setFrustum(gfxFrustum);
+
+      setGFXMatrices( rightWorld );
+
+      SceneCameraState cameraStateRight = SceneCameraState::fromGFX();
+      SceneRenderState renderStateRight( gClientSceneGraph, SPT_Reflect, cameraStateRight );
+      renderStateRight.setSceneRenderStyle(SRS_SideBySide);
+      renderStateRight.setSceneRenderField(1);
+      renderStateRight.getMaterialDelegate().bind( REFLECTMGR, &ReflectionManager::getReflectionMaterial );
+      renderStateRight.setDiffuseCameraTransform( params.query->cameraMatrix );
+      renderStateRight.disableAdvancedLightingBins(true);
+
+      gClientSceneGraph->renderSceneNoLights( &renderStateRight, objTypeFlag );
+
+      // Restore previous values
+      gfxFrustum.clearProjectionOffset();
+      GFX->setFrustum(gfxFrustum);
+      GFX->setViewport(originalVP);
+   }
+   else
+   {
+      SceneRenderState reflectRenderState
+      (
+         gClientSceneGraph,
+         SPT_Reflect,
+         SceneCameraState::fromGFX()
+      );
+
+      reflectRenderState.getMaterialDelegate().bind( REFLECTMGR, &ReflectionManager::getReflectionMaterial );
+      reflectRenderState.setDiffuseCameraTransform( params.query->cameraMatrix );
+      reflectRenderState.disableAdvancedLightingBins(true);
+
+      gClientSceneGraph->renderSceneNoLights( &reflectRenderState, objTypeFlag );
+   }
+
+   LIGHTMGR->unregisterAllLights();
+
+   // Clean up.
+   reflectTarget->resolve();
+   GFX->popActiveRenderTarget();
+
+   // Restore detail adjust amount.
+   TSShapeInstance::smDetailAdjust = detailAdjustBackup;
+
+   mIsRendering = false;
+}
+
+void PlaneReflector::setGFXMatrices( const MatrixF &camTrans )
+{
    if ( objectSpace )
    {
       // set up camera transform relative to object
       MatrixF invObjTrans = mObject->getRenderTransform();
       invObjTrans.inverse();
-      MatrixF relCamTrans = invObjTrans * params.query->cameraMatrix;
+      MatrixF relCamTrans = invObjTrans * camTrans;
 
       MatrixF camReflectTrans = getCameraReflection( relCamTrans );
       MatrixF camTrans = mObject->getRenderTransform() * camReflectTrans;
@@ -586,8 +717,6 @@ void PlaneReflector::updateReflection( const ReflectParams &params )
    }    
    else
    {
-      MatrixF camTrans = params.query->cameraMatrix;
-
       // set world mat from new camera view
       MatrixF camReflectTrans = getCameraReflection( camTrans );
       camReflectTrans.inverse();
@@ -598,60 +727,9 @@ void PlaneReflector::updateReflection( const ReflectParams &params )
       MatrixF clipProj = getFrustumClipProj( camReflectTrans );
       GFX->setProjectionMatrix( clipProj );
    }   
-
-   // Adjust the detail amount
-   F32 detailAdjustBackup = TSShapeInstance::smDetailAdjust;
-   TSShapeInstance::smDetailAdjust *= mDesc->detailAdjust;
-
-
-   if(reflectTarget.isNull())
-      reflectTarget = GFX->allocRenderToTextureTarget();
-   reflectTarget->attachTexture( GFXTextureTarget::Color0, reflectTex );
-   reflectTarget->attachTexture( GFXTextureTarget::DepthStencil, depthBuff );
-   GFX->pushActiveRenderTarget();
-   GFX->setActiveRenderTarget( reflectTarget );   
-
-   SceneRenderState reflectRenderState
-   (
-      gClientSceneGraph,
-      SPT_Reflect,
-      SceneCameraState::fromGFX()
-   );
-
-   reflectRenderState.getMaterialDelegate().bind( REFLECTMGR, &ReflectionManager::getReflectionMaterial );
-   reflectRenderState.setDiffuseCameraTransform( params.query->cameraMatrix );
-   reflectRenderState.disableAdvancedLightingBins(true);
-
-   U32 objTypeFlag = -1;
-   LIGHTMGR->registerGlobalLights( &reflectRenderState.getFrustum(), false );
-
-   // Since we can sometime be rendering a reflection for 1 or 2 frames before
-   // it gets updated do to the lag associated with getting the results from
-   // a HOQ we can sometimes see into parts of the reflection texture that
-   // have nothing but clear color ( eg. under the water ).
-   // To make this look less crappy use the ambient color of the sun.
-   //
-   // In the future we may want to fix this instead by having the scatterSky
-   // render a skirt or something in its lower half.
-   //
-   ColorF clearColor = reflectRenderState.getAmbientLightColor();
-   GFX->clear( GFXClearZBuffer | GFXClearStencil | GFXClearTarget, clearColor, 1.0f, 0 );
-
-   gClientSceneGraph->renderSceneNoLights( &reflectRenderState, objTypeFlag );
-
-   LIGHTMGR->unregisterAllLights();
-
-   // Clean up.
-   reflectTarget->resolve();
-   GFX->popActiveRenderTarget();
-
-   // Restore detail adjust amount.
-   TSShapeInstance::smDetailAdjust = detailAdjustBackup;
-
-   mIsRendering = false;
 }
 
-MatrixF PlaneReflector::getCameraReflection( MatrixF &camTrans )
+MatrixF PlaneReflector::getCameraReflection( const MatrixF &camTrans )
 {
    Point3F normal = refplane;
 
diff --git a/Engine/source/scene/reflector.h b/Engine/source/scene/reflector.h
index 25c6399a8..bc5dca698 100644
--- a/Engine/source/scene/reflector.h
+++ b/Engine/source/scene/reflector.h
@@ -202,8 +202,11 @@ public:
    virtual F32 calcScore( const ReflectParams &params );
    virtual void updateReflection( const ReflectParams &params ); 
 
+   /// Set up the GFX matrices
+   void setGFXMatrices( const MatrixF &camTrans );
+
    /// Set up camera matrix for a reflection on the plane
-   MatrixF getCameraReflection( MatrixF &camTrans );
+   MatrixF getCameraReflection( const MatrixF &camTrans );
 
    /// Oblique frustum clipping - use near plane of zbuffer as a clip plane
    MatrixF getFrustumClipProj( MatrixF &modelview );
diff --git a/Engine/source/scene/sceneContainer.cpp b/Engine/source/scene/sceneContainer.cpp
index 27eabbe6c..e99f5be11 100644
--- a/Engine/source/scene/sceneContainer.cpp
+++ b/Engine/source/scene/sceneContainer.cpp
@@ -243,7 +243,7 @@ void SceneContainer::insertIntoBins(SceneObject* obj)
 
    // For huge objects, dump them into the overflow bin.  Otherwise, everything
    //  goes into the grid...
-   if ((maxX - minX + 1) < csmNumBins || (maxY - minY + 1) < csmNumBins && !obj->isGlobalBounds())
+   if (!obj->isGlobalBounds() && ((maxX - minX + 1) < csmNumBins || (maxY - minY + 1) < csmNumBins))
    {
       SceneObjectRef** pCurrInsert = &obj->mBinRefHead;
 
diff --git a/Engine/source/scene/sceneContainer.h b/Engine/source/scene/sceneContainer.h
index b79317aff..0033d7cab 100644
--- a/Engine/source/scene/sceneContainer.h
+++ b/Engine/source/scene/sceneContainer.h
@@ -96,6 +96,11 @@ enum PolyListContext
    /// selection from an editor or other tool.
    PLC_Selection,
 
+   /// A hint that the polylist is used for
+   /// building a representation of the environment
+   /// used for navigation.
+   PLC_Navigation,
+
    /// A hint that the polyist will be used
    /// to export geometry and would like to have
    /// texture coords and materials.   
diff --git a/Engine/source/scene/sceneManager.cpp b/Engine/source/scene/sceneManager.cpp
index 5210d5345..3fcd37422 100644
--- a/Engine/source/scene/sceneManager.cpp
+++ b/Engine/source/scene/sceneManager.cpp
@@ -191,7 +191,7 @@ void SceneManager::renderScene( SceneRenderState* renderState, U32 objectMask, S
    // Get the lights for rendering the scene.
 
    PROFILE_START( SceneGraph_registerLights );
-      LIGHTMGR->registerGlobalLights( &renderState->getFrustum(), false );
+      LIGHTMGR->registerGlobalLights( &renderState->getCullingFrustum(), false );
    PROFILE_END();
 
    // If its a diffuse pass, update the current ambient light level.
@@ -242,6 +242,9 @@ void SceneManager::renderScene( SceneRenderState* renderState, U32 objectMask, S
       Point2F projOffset = GFX->getCurrentProjectionOffset();
       Point3F eyeOffset = GFX->getStereoEyeOffset();
 
+      // Indicate that we're about to start a field
+      GFX->beginField();
+
       // Render left half of display
       RectI leftVP = originalVP;
       leftVP.extent.x *= 0.5;
@@ -264,6 +267,12 @@ void SceneManager::renderScene( SceneRenderState* renderState, U32 objectMask, S
 
       renderSceneNoLights( &renderStateLeft, objectMask, baseObject, baseZone );
 
+      // Indicate that we've just finished a field
+      GFX->endField();
+
+      // Indicate that we're about to start a field
+      GFX->beginField();
+
       // Render right half of display
       RectI rightVP = originalVP;
       rightVP.extent.x *= 0.5;
@@ -287,6 +296,9 @@ void SceneManager::renderScene( SceneRenderState* renderState, U32 objectMask, S
 
       renderSceneNoLights( &renderStateRight, objectMask, baseObject, baseZone );
 
+      // Indicate that we've just finished a field
+      GFX->endField();
+
       // Restore previous values
       GFX->setWorldMatrix(originalWorld);
       gfxFrustum.clearProjectionOffset();
@@ -392,7 +404,7 @@ void SceneManager::_renderScene( SceneRenderState* state, U32 objectMask, SceneZ
    // the opportunity to render editor visualizations even if
    // they are otherwise not in view.
 
-   if( !state->getFrustum().getBounds().isOverlapped( state->getRenderArea() ) )
+   if( !state->getCullingFrustum().getBounds().isOverlapped( state->getRenderArea() ) )
    {
       // This handles fringe cases like flying backwards into a zone where you
       // end up pretty much standing on a zone border and looking directly into
@@ -403,7 +415,7 @@ void SceneManager::_renderScene( SceneRenderState* state, U32 objectMask, SceneZ
       return;
    }
 
-   Box3F queryBox = state->getFrustum().getBounds();
+   Box3F queryBox = state->getCullingFrustum().getBounds();
    if( !gEditingMission )
    {
       queryBox.minExtents.setMax( state->getRenderArea().minExtents );
diff --git a/Engine/source/scene/sceneRenderState.h b/Engine/source/scene/sceneRenderState.h
index 7a66e40aa..4b8fd200d 100644
--- a/Engine/source/scene/sceneRenderState.h
+++ b/Engine/source/scene/sceneRenderState.h
@@ -145,8 +145,11 @@ class SceneRenderState
       const SceneCullingState& getCullingState() const { return mCullingState; }
       SceneCullingState& getCullingState() { return mCullingState; }
 
-      /// Returns the root frustum.
-      const Frustum& getFrustum() const { return getCullingState().getFrustum(); }
+      /// Returns the root culling frustum.
+      const Frustum& getCullingFrustum() const { return getCullingState().getCullingFrustum(); }
+
+      /// Returns the root camera frustum.
+      const Frustum& getCameraFrustum() const { return getCullingState().getCameraFrustum(); }
 
       /// @}
 
@@ -262,10 +265,10 @@ class SceneRenderState
       const MatrixF& getCameraTransform() const { return getCullingState().getCameraState().getViewWorldMatrix(); }
 
       /// Returns the minimum distance something must be from the camera to not be culled.
-      F32 getNearPlane() const { return getFrustum().getNearDist();   }
+      F32 getNearPlane() const { return getCullingFrustum().getNearDist();   }
 
       /// Returns the maximum distance something can be from the camera to not be culled.
-      F32 getFarPlane() const { return getFrustum().getFarDist();    }
+      F32 getFarPlane() const { return getCullingFrustum().getFarDist();    }
 
       /// Returns the camera vector normalized to 1 / far distance.
       const Point3F& getVectorEye() const { return mVectorEye; }
diff --git a/Engine/source/sfx/fmod/sfxFMODDevice.cpp b/Engine/source/sfx/fmod/sfxFMODDevice.cpp
index aeb42709b..7875abf67 100644
--- a/Engine/source/sfx/fmod/sfxFMODDevice.cpp
+++ b/Engine/source/sfx/fmod/sfxFMODDevice.cpp
@@ -33,7 +33,6 @@
 bool                 SFXFMODDevice::smPrefDisableSoftware = false;
 bool                 SFXFMODDevice::smPrefUseSoftwareOcclusion = true;
 bool                 SFXFMODDevice::smPrefUseSoftwareHRTF = true;
-bool                 SFXFMODDevice::smPrefUseSoftwareReverbLowmem = false;
 bool                 SFXFMODDevice::smPrefEnableProfile = false;
 bool                 SFXFMODDevice::smPrefGeometryUseClosest = false;
 const char*          SFXFMODDevice::smPrefDSoundHRTF = "full";
@@ -248,8 +247,6 @@ bool SFXFMODDevice::_init()
          flags |= FMOD_INIT_OCCLUSION_LOWPASS;
       if( smPrefUseSoftwareHRTF )
          flags |= FMOD_INIT_HRTF_LOWPASS;
-      if( smPrefUseSoftwareReverbLowmem )
-         flags |= FMOD_INIT_SOFTWARE_REVERB_LOWMEM;
       if( smPrefEnableProfile )
          flags |= FMOD_INIT_ENABLE_PROFILE;
       if( smPrefGeometryUseClosest )
diff --git a/Engine/source/sfx/fmod/sfxFMODDevice.h b/Engine/source/sfx/fmod/sfxFMODDevice.h
index 929851a9f..df90eae0d 100644
--- a/Engine/source/sfx/fmod/sfxFMODDevice.h
+++ b/Engine/source/sfx/fmod/sfxFMODDevice.h
@@ -294,9 +294,6 @@ class SFXFMODDevice : public SFXDevice
       ///
       static bool smPrefUseSoftwareHRTF;
       
-      ///
-      static bool smPrefUseSoftwareReverbLowmem;
-      
       ///
       static bool smPrefEnableProfile;
       
diff --git a/Engine/source/sfx/fmod/sfxFMODProvider.cpp b/Engine/source/sfx/fmod/sfxFMODProvider.cpp
index 5ce41bb9f..f11996c97 100644
--- a/Engine/source/sfx/fmod/sfxFMODProvider.cpp
+++ b/Engine/source/sfx/fmod/sfxFMODProvider.cpp
@@ -60,10 +60,6 @@ public:
          "This will add a lowpass filter effect to the DSP effect chain of all sounds mixed in software.\n\n"
          "@note Only applies when using an %FMOD sound device.\n\n"
          "@ingroup SFXFMOD" );
-      Con::addVariable( "$pref::SFX::FMOD::useSoftwareReverbLowmem", TypeBool, &SFXFMODDevice::smPrefUseSoftwareReverbLowmem,
-         "If true, %FMOD's SFX reverb is run using 22/24kHz delay buffers, halving the memory required.\n\n"
-         "@note Only applies when using an %FMOD sound device.\n\n"
-         "@ingroup SFXFMOD" );
       Con::addVariable( "$pref::SFX::FMOD::enableProfile", TypeBool, &SFXFMODDevice::smPrefEnableProfile,
          "Whether to enable support for %FMOD's profiler.\n\n"
          "@note Only applies when using an %FMOD sound device.\n\n"
diff --git a/Engine/source/sfx/sfxProfile.cpp b/Engine/source/sfx/sfxProfile.cpp
index 80bc5a698..bbb40c249 100644
--- a/Engine/source/sfx/sfxProfile.cpp
+++ b/Engine/source/sfx/sfxProfile.cpp
@@ -31,6 +31,7 @@
 #include "core/resourceManager.h"
 #include "console/engineAPI.h"
 
+using namespace Torque;
 
 IMPLEMENT_CO_DATABLOCK_V1( SFXProfile );
 
diff --git a/Engine/source/shaderGen/shaderGenVars.cpp b/Engine/source/shaderGen/shaderGenVars.cpp
index cf01e5aab..3e5a577ee 100644
--- a/Engine/source/shaderGen/shaderGenVars.cpp
+++ b/Engine/source/shaderGen/shaderGenVars.cpp
@@ -65,6 +65,7 @@ const String ShaderGenVars::lightSpotAngle("$inLightSpotAngle");
 const String ShaderGenVars::lightSpotFalloff("$inLightSpotFalloff");
 const String ShaderGenVars::specularColor("$specularColor");
 const String ShaderGenVars::specularPower("$specularPower");
+const String ShaderGenVars::specularStrength("$specularStrength");
 
 // These are ignored by the D3D layers.
 const String ShaderGenVars::fogMap("$fogMap");
diff --git a/Engine/source/shaderGen/shaderGenVars.h b/Engine/source/shaderGen/shaderGenVars.h
index 80a308909..a98940101 100644
--- a/Engine/source/shaderGen/shaderGenVars.h
+++ b/Engine/source/shaderGen/shaderGenVars.h
@@ -78,6 +78,7 @@ struct ShaderGenVars
    const static String lightSpotFalloff;
    const static String specularColor;
    const static String specularPower;
+   const static String specularStrength;
    
    // Textures
    const static String fogMap;
diff --git a/Engine/source/sim/actionMap.cpp b/Engine/source/sim/actionMap.cpp
index 125225d99..43169f471 100644
--- a/Engine/source/sim/actionMap.cpp
+++ b/Engine/source/sim/actionMap.cpp
@@ -1508,9 +1508,9 @@ bool ActionMap::processAction(const InputEventInfo* pEvent)
             else
             {
                if( value > 0 )
-                  value = ( value - pNode->deadZoneBegin ) * ( 1.f / ( 1.f - pNode->deadZoneBegin ) );
+                  value = ( value - pNode->deadZoneEnd ) * ( 1.f / ( 1.f - pNode->deadZoneEnd ) );
                else
-                  value = ( value + pNode->deadZoneBegin ) * ( 1.f / ( 1.f - pNode->deadZoneBegin ) );
+                  value = ( value - pNode->deadZoneBegin ) * ( 1.f / ( 1.f + pNode->deadZoneBegin ) );
             }
          }
 
diff --git a/Engine/source/terrain/terrCollision.cpp b/Engine/source/terrain/terrCollision.cpp
index 5ff849f4d..0b148445d 100644
--- a/Engine/source/terrain/terrCollision.cpp
+++ b/Engine/source/terrain/terrCollision.cpp
@@ -480,7 +480,7 @@ static void clrbuf(U32* p, U32 s)
       *p++ = U32_MAX;
 }
 
-bool TerrainBlock::buildPolyList(PolyListContext, AbstractPolyList* polyList, const Box3F &box, const SphereF&)
+bool TerrainBlock::buildPolyList(PolyListContext context, AbstractPolyList* polyList, const Box3F &box, const SphereF&)
 {
 	PROFILE_SCOPE( TerrainBlock_buildPolyList );
 
@@ -530,12 +530,25 @@ bool TerrainBlock::buildPolyList(PolyListContext, AbstractPolyList* polyList, co
 
       swap(vb[0],vb[1]);
       clrbuf(vb[1],xExt + 1);
+
+      F32 wy1 = y * mSquareSize, wy2 = (y + 1) * mSquareSize;
+      if(context == PLC_Navigation &&
+         ((wy1 > osBox.maxExtents.y && wy2 > osBox.maxExtents.y) ||
+          (wy1 < osBox.minExtents.y && wy2 < osBox.minExtents.y)))
+         continue;
+
       //
       for (S32 x = xStart; x < xEnd; x++) 
       {
          S32 xi = x & BlockMask;
          const TerrainSquare *sq = mFile->findSquare( 0, xi, yi );
 
+         F32 wx1 = x * mSquareSize, wx2 = (x + 1) * mSquareSize;
+         if(context == PLC_Navigation &&
+            ((wx1 > osBox.maxExtents.x && wx2 > osBox.maxExtents.x) ||
+             (wx1 < osBox.minExtents.x && wx2 < osBox.minExtents.x)))
+            continue;
+
          if ( x != xi || y != yi )
             continue;
 
diff --git a/Engine/source/terrain/terrData.cpp b/Engine/source/terrain/terrData.cpp
index bb77fcf4f..9db97f28a 100644
--- a/Engine/source/terrain/terrData.cpp
+++ b/Engine/source/terrain/terrData.cpp
@@ -1070,6 +1070,9 @@ void TerrainBlock::setTransform(const MatrixF & mat)
 
    setRenderTransform( mat );
    setMaskBits( TransformMask );
+
+   if(isClientObject())
+      smUpdateSignal.trigger( HeightmapUpdate, this, Point2I::Zero, Point2I::Max );
 }
 
 void TerrainBlock::setScale( const VectorF &scale )
diff --git a/Engine/source/terrain/terrImport.cpp b/Engine/source/terrain/terrImport.cpp
index ede106402..ad9a8d98b 100644
--- a/Engine/source/terrain/terrImport.cpp
+++ b/Engine/source/terrain/terrImport.cpp
@@ -31,6 +31,7 @@
 #include "util/noise2d.h"
 #include "core/volume.h"
 
+using namespace Torque;
 
 ConsoleStaticMethod( TerrainBlock, createNew, S32, 5, 5, 
    "TerrainBlock.create( String terrainName, U32 resolution, String materialName, bool genNoise )\n"
diff --git a/Engine/source/terrain/terrRender.cpp b/Engine/source/terrain/terrRender.cpp
index bc308e3ef..5d7146fec 100644
--- a/Engine/source/terrain/terrRender.cpp
+++ b/Engine/source/terrain/terrRender.cpp
@@ -219,8 +219,12 @@ void TerrainBlock::_updateBaseTexture( bool writeToCache )
       points[3].texCoord   = Point2F(  1.0, needsYFlip ? 0.0f : 1.0f );
 
       vb.set( GFX, 4, GFXBufferTypeVolatile );
-      dMemcpy( vb.lock(), points, sizeof(GFXVertexPT) * 4 );
-      vb.unlock();
+      GFXVertexPT *ptr = vb.lock();
+      if(ptr)
+      {
+         dMemcpy( ptr, points, sizeof(GFXVertexPT) * 4 );
+         vb.unlock();
+      }
    }
 
    GFXTexHandle blendTex;
diff --git a/Engine/source/ts/tsAnimate.cpp b/Engine/source/ts/tsAnimate.cpp
index e2332b699..8c80fea18 100644
--- a/Engine/source/ts/tsAnimate.cpp
+++ b/Engine/source/ts/tsAnimate.cpp
@@ -293,6 +293,7 @@ void TSShapeInstance::updateTransitionNodeTransforms(TSIntegerSet& transitionNod
    transitionNodes.overlap(mTransitionRotationNodes);
    transitionNodes.overlap(mTransitionTranslationNodes);
    transitionNodes.overlap(mTransitionScaleNodes);
+   transitionNodes.takeAway(mHandsOffNodes);
 
    // Decompose transforms for nodes affected by the transition. Only need to do
    // for blended or scale-animated nodes, as all others are already up to date
@@ -578,6 +579,7 @@ void TSShapeInstance::handleBlendSequence(TSThread * thread, S32 a, S32 b)
    TSIntegerSet nodeMatters = thread->getSequence()->translationMatters;
    nodeMatters.overlap(thread->getSequence()->rotationMatters);
    nodeMatters.overlap(thread->getSequence()->scaleMatters);
+   nodeMatters.takeAway(mHandsOffNodes);
    S32 start = nodeMatters.start();
    S32 end   = b;
    for (S32 nodeIndex=start; nodeIndex<end; nodeMatters.next(nodeIndex))
diff --git a/Engine/source/ts/tsMesh.cpp b/Engine/source/ts/tsMesh.cpp
index aa430a557..d857c0547 100644
--- a/Engine/source/ts/tsMesh.cpp
+++ b/Engine/source/ts/tsMesh.cpp
@@ -876,18 +876,33 @@ bool TSMesh::castRayRendered( S32 frame, const Point3F & start, const Point3F &
 
 bool TSMesh::addToHull( U32 idx0, U32 idx1, U32 idx2 )
 {
-   Point3F normal;
-   mCross(mVertexData[idx2].vert()-mVertexData[idx0].vert(),mVertexData[idx1].vert()-mVertexData[idx0].vert(),&normal);
-   if ( mDot( normal, normal ) < 0.001f )
+   // calculate the normal of this triangle... remember, we lose precision
+   // when we subtract two large numbers that are very close to each other,
+   // so depending on how we calculate the normal, we could get a 
+   // different result. so, we will calculate the normal three different
+   // ways and take the one that gives us the largest vector before we
+   // normalize.
+   Point3F normal1, normal2, normal3;
+   mCross(mVertexData[idx2].vert()-mVertexData[idx0].vert(),mVertexData[idx1].vert()-mVertexData[idx0].vert(),&normal1);
+   mCross(mVertexData[idx0].vert()-mVertexData[idx1].vert(),mVertexData[idx2].vert()-mVertexData[idx1].vert(),&normal2);
+   mCross(mVertexData[idx1].vert()-mVertexData[idx2].vert(),mVertexData[idx0].vert()-mVertexData[idx2].vert(),&normal3);
+   Point3F normal = normal1;
+   F32 greatestMagSquared = mDot(normal1, normal1);
+   F32 magSquared = mDot(normal2, normal2);
+   if (magSquared > greatestMagSquared)
    {
-      mCross( mVertexData[idx0].vert() - mVertexData[idx1].vert(), mVertexData[idx2].vert() - mVertexData[idx1].vert(), &normal );
-      if ( mDot( normal, normal ) < 0.001f )
-      {
-         mCross( mVertexData[idx1].vert() - mVertexData[idx2].vert(), mVertexData[idx0].vert() - mVertexData[idx2].vert(), &normal );
-         if ( mDot( normal, normal ) < 0.001f )
-            return false;
-      }
+      normal = normal2;
+      greatestMagSquared = magSquared;
    }
+   magSquared = mDot(normal3, normal3);
+   if (magSquared > greatestMagSquared)
+   {
+      normal = normal3;
+      greatestMagSquared = magSquared;
+   }
+   if (mDot(normal, normal) < 0.00000001f)
+       return false;
+
    normal.normalize();
    F32 k = mDot( normal, mVertexData[idx0].vert() );
    for ( S32 i = 0; i < planeNormals.size(); i++ ) 
@@ -1254,6 +1269,7 @@ void TSSkinMesh::updateSkin( const Vector<MatrixF> &transforms, TSVertexBufferHa
 
       // Lock, and skin directly into the final memory destination
       outPtr = (U8 *)instanceVB.lock();
+      if(!outPtr) return;
 #endif
       // Set position/normal to zero so we can accumulate
       zero_vert_normal_bulk(mNumVerts, outPtr, outStride);
@@ -2379,6 +2395,7 @@ void TSMesh::_createVBIB( TSVertexBufferHandle &vb, GFXPrimitiveBufferHandle &pb
 
       // Copy from aligned memory right into GPU memory
       U8 *vertData = (U8*)vb.lock();
+      if(!vertData) return;
 #if defined(TORQUE_OS_XENON)
       XMemCpyStreaming_WriteCombined( vertData, mVertexData.address(), mVertexData.mem_size() );
 #else
diff --git a/Engine/source/ts/tsShape.cpp b/Engine/source/ts/tsShape.cpp
index b7f791bed..726efabff 100644
--- a/Engine/source/ts/tsShape.cpp
+++ b/Engine/source/ts/tsShape.cpp
@@ -1177,6 +1177,11 @@ void TSShape::assembleShape()
       {
          TSMesh::smVertsList[i]  = mesh->verts.address();
          TSMesh::smTVertsList[i] = mesh->tverts.address();
+         if (smReadVersion >= 26)
+         {
+            TSMesh::smTVerts2List[i] = mesh->tverts2.address();
+            TSMesh::smColorsList[i] = mesh->colors.address();
+         }
          TSMesh::smNormsList[i]  = mesh->norms.address();
          TSMesh::smEncodedNormsList[i] = mesh->encodedNorms.address();
          TSMesh::smDataCopied[i] = !skip; // as long as we didn't skip this mesh, the data should be in shape now
diff --git a/Engine/source/ts/tsShapeInstance.cpp b/Engine/source/ts/tsShapeInstance.cpp
index a7cebb027..cbe2ffc94 100644
--- a/Engine/source/ts/tsShapeInstance.cpp
+++ b/Engine/source/ts/tsShapeInstance.cpp
@@ -259,8 +259,10 @@ void TSShapeInstance::cloneMaterialList( const FeatureSet *features )
    if ( mOwnMaterialList )
       return;
 
+   Material::sAllowTextureTargetAssignment = true;
    mMaterialList = new TSMaterialList(mMaterialList);
    initMaterialList( features );
+   Material::sAllowTextureTargetAssignment = false;
 
    mOwnMaterialList = true;
 }
diff --git a/README.md b/README.md
index 1672ecd2a..016232d4f 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-Torque 3D v3.0
+Torque 3D v3.5
 ==============
 
 MIT Licensed Open Source version of [Torque 3D](http://www.garagegames.com/products/torque-3d) from [GarageGames](http://www.garagegames.com)
diff --git a/Templates/Empty/game/core/art/datablocks/defaultparticle.cs b/Templates/Empty/game/core/art/datablocks/defaultparticle.cs
index 017d30648..7f973d525 100644
--- a/Templates/Empty/game/core/art/datablocks/defaultparticle.cs
+++ b/Templates/Empty/game/core/art/datablocks/defaultparticle.cs
@@ -62,5 +62,5 @@ datablock ParticleEmitterData(DefaultEmitter)
    particles = "DefaultParticle";
    blendStyle = "ADDITIVE";
    softParticles = "0";
-   softnessDistance = "1000";
+   softnessDistance = "1";
 };
diff --git a/Templates/Empty/game/core/scripts/client/oculusVR.cs b/Templates/Empty/game/core/scripts/client/oculusVR.cs
index 81015b40f..f0035999c 100644
--- a/Templates/Empty/game/core/scripts/client/oculusVR.cs
+++ b/Templates/Empty/game/core/scripts/client/oculusVR.cs
@@ -63,7 +63,14 @@ function enableOculusVRDisplay(%gameConnection, %trueStereoRendering)
    
    if(%trueStereoRendering)
    {
-      OVRBarrelDistortionPostFX.isEnabled = true;
+      if($pref::OculusVR::UseChromaticAberrationCorrection)
+      {
+         OVRBarrelDistortionChromaPostFX.isEnabled = true;
+      }
+      else
+      {
+         OVRBarrelDistortionPostFX.isEnabled = true;
+      }
    }
    else
    {
@@ -81,6 +88,7 @@ function disableOculusVRDisplay(%gameConnection)
    %gameConnection.clearDisplayDevice();
    PlayGui.renderStyle = "standard";
    OVRBarrelDistortionPostFX.isEnabled = false;
+   OVRBarrelDistortionChromaPostFX.isEnabled = false;
    OVRBarrelDistortionMonoPostFX.isEnabled = false;
 }
 
@@ -112,7 +120,7 @@ function setStandardOculusVRControlScheme(%gameConnection)
 function setVideoModeForOculusVRDisplay(%fullscreen)
 {
    %res = getOVRHMDResolution(0);
-   Canvas.setVideoMode(%res.x, %res.y, %fullscreen, 32, 0);
+   Canvas.setVideoMode(%res.x, %res.y, %fullscreen, 32, 4);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Templates/Empty/game/core/scripts/client/postFx/caustics.cs b/Templates/Empty/game/core/scripts/client/postFx/caustics.cs
index c24176e48..c6a694c51 100644
--- a/Templates/Empty/game/core/scripts/client/postFx/caustics.cs
+++ b/Templates/Empty/game/core/scripts/client/postFx/caustics.cs
@@ -20,7 +20,6 @@
 // IN THE SOFTWARE.
 //-----------------------------------------------------------------------------
 
-
 singleton GFXStateBlockData( PFX_CausticsStateBlock : PFX_DefaultStateBlock )
 {
    blendDefined = true;
@@ -42,16 +41,11 @@ singleton ShaderData( PFX_CausticsShader )
    //OGLVertexShaderFile  = "shaders/common/postFx/gl//postFxV.glsl";
    //OGLPixelShaderFile   = "shaders/common/postFx/gl/passthruP.glsl";
       
-   samplerNames[0] = "$prepassTex";
-   samplerNames[1] = "$causticsTex1";
-   samplerNames[2] = "$causticsTex2";
-   
    pixVersion = 3.0;
 };
 
 singleton PostEffect( CausticsPFX )
 {
-   requirements = "None";
    isEnabled = false;
    renderTime = "PFXBeforeBin";
    renderBin = "ObjTranslucentBin";      
@@ -63,17 +57,4 @@ singleton PostEffect( CausticsPFX )
    texture[1] = "textures/caustics_1";
    texture[2] = "textures/caustics_2";
    target = "$backBuffer";
-   
 };
-
-// this effects the timing of the animation -
-
-$CausticsPFX::refTime = getSimTime();
-
-function CausticsPFX::setShaderConsts(%this)
-{
-   //echo($Sim::time - %this.timeStart);
-   //echo(%this.timeConst);
-   %this.setShaderConst( "$refTime", $CausticsPFX::refTime ); 
-}
-
diff --git a/Templates/Empty/game/core/scripts/client/postFx/fog.cs b/Templates/Empty/game/core/scripts/client/postFx/fog.cs
index 44539cb0b..5257db595 100644
--- a/Templates/Empty/game/core/scripts/client/postFx/fog.cs
+++ b/Templates/Empty/game/core/scripts/client/postFx/fog.cs
@@ -117,3 +117,16 @@ singleton PostEffect( UnderwaterFogPostFx )
    isEnabled = true;
 };
 
+function UnderwaterFogPostFx::onEnabled( %this )
+{
+   TurbulenceFx.enable();
+   CausticsPFX.enable();
+   return true;
+}
+
+function UnderwaterFogPostFx::onDisabled( %this )
+{
+   TurbulenceFx.disable();
+   CausticsPFX.disable();
+   return false;
+}
diff --git a/Templates/Empty/game/core/scripts/client/postFx/ovrBarrelDistortion.cs b/Templates/Empty/game/core/scripts/client/postFx/ovrBarrelDistortion.cs
index 7978a6bc8..cbd72ec84 100644
--- a/Templates/Empty/game/core/scripts/client/postFx/ovrBarrelDistortion.cs
+++ b/Templates/Empty/game/core/scripts/client/postFx/ovrBarrelDistortion.cs
@@ -44,6 +44,14 @@ singleton ShaderData( OVRBarrelDistortionShader )
    pixVersion = 2.0;   
 };
 
+singleton ShaderData( OVRBarrelDistortionChromaShader )
+{
+   DXVertexShaderFile 	= "shaders/common/postFx/postFxV.hlsl";
+   DXPixelShaderFile 	= "shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl";
+
+   pixVersion = 2.0;   
+};
+
 //-----------------------------------------------------------------------------
 // GFX state blocks
 //-----------------------------------------------------------------------------
@@ -78,6 +86,32 @@ singleton BarrelDistortionPostEffect( OVRBarrelDistortionPostFX )
    scaleOutput = 1.25;
 };
 
+//-----------------------------------------------------------------------------
+// Barrel Distortion with Chromatic Aberration Correction PostFx
+//
+// To be used with the Oculus Rift.
+// Expects a stereo pair to exist on the back buffer and then applies the
+// appropriate barrel distortion.
+// This version applies a chromatic aberration correction during the
+// barrel distortion.
+//-----------------------------------------------------------------------------
+singleton BarrelDistortionPostEffect( OVRBarrelDistortionChromaPostFX )
+{
+   isEnabled = false;
+   allowReflectPass = false;
+   
+   renderTime = "PFXAfterDiffuse";
+   renderPriority = 100;
+
+   // The barrel distortion   
+   shader = OVRBarrelDistortionChromaShader;
+   stateBlock = OVRBarrelDistortionStateBlock;
+   
+   texture[0] = "$backBuffer";
+   
+   scaleOutput = 1.25;
+};
+
 //-----------------------------------------------------------------------------
 // Barrel Distortion Mono PostFx
 //
diff --git a/Templates/Empty/game/core/scripts/client/postFx/ssao.cs b/Templates/Empty/game/core/scripts/client/postFx/ssao.cs
index 69977f352..cd5bfc598 100644
--- a/Templates/Empty/game/core/scripts/client/postFx/ssao.cs
+++ b/Templates/Empty/game/core/scripts/client/postFx/ssao.cs
@@ -190,6 +190,7 @@ singleton PostEffect( SSAOPostFx )
    
    target = "$outTex";
    targetScale = "0.5 0.5";
+   targetViewport = "PFXTargetViewport_NamedInTexture0";
    
    singleton PostEffect()
    {
diff --git a/Templates/Empty/game/core/scripts/client/postFx/turbulence.cs b/Templates/Empty/game/core/scripts/client/postFx/turbulence.cs
index af4d7dc7f..767470e67 100644
--- a/Templates/Empty/game/core/scripts/client/postFx/turbulence.cs
+++ b/Templates/Empty/game/core/scripts/client/postFx/turbulence.cs
@@ -20,44 +20,34 @@
 // IN THE SOFTWARE.
 //-----------------------------------------------------------------------------
 
+singleton GFXStateBlockData( PFX_TurbulenceStateBlock : PFX_DefaultStateBlock)  
+{  
+   zDefined = false;
+   zEnable = false;  
+   zWriteEnable = false;  
+        
+   samplersDefined = true;  
+   samplerStates[0] = SamplerClampLinear;
+};  
+  
 singleton ShaderData( PFX_TurbulenceShader )
 {   
    DXVertexShaderFile 	= "shaders/common/postFx/postFxV.hlsl";
    DXPixelShaderFile 	= "shaders/common/postFx/turbulenceP.hlsl";
            
-   samplerNames[0] = "$inputTex";
    pixVersion = 3.0;
 };
 
 singleton PostEffect( TurbulenceFx )  
 {  
-   requirements = "None";
    isEnabled = false;    
    allowReflectPass = true;  
          
-   renderTime = "PFXAfterDiffuse";  
-   renderBin = "ObjTranslucentBin";     
+   renderTime = "PFXAfterBin";
+   renderBin = "GlowBin";
+   renderPriority = 0.5; // Render after the glows themselves
      
    shader = PFX_TurbulenceShader;  
-   stateBlock = PFX_myShaderStateBlock;  
-   texture[0] = "$backBuffer";  
-      
-   renderPriority = 0.1;  
+   stateBlock=PFX_TurbulenceStateBlock;
+   texture[0] = "$backBuffer";      
  };
-
-function TurbulenceFx::setShaderConsts(%this)
-{
-   %this.setShaderConst(%this.timeConst, $Sim::time - %this.timeStart); 
-}
-
-function UnderwaterFogPostFx::onEnabled( %this )
-{
-   TurbulenceFx.enable();
-   return true;
-}
-
-function UnderwaterFogPostFx::onDisabled( %this )
-{
-   TurbulenceFx.disable();
-   return false;
-}
\ No newline at end of file
diff --git a/Templates/Empty/game/core/scripts/server/game.cs b/Templates/Empty/game/core/scripts/server/game.cs
index a7198cafb..d80dd4468 100644
--- a/Templates/Empty/game/core/scripts/server/game.cs
+++ b/Templates/Empty/game/core/scripts/server/game.cs
@@ -34,8 +34,8 @@ function onServerCreated()
    
    // Load up any objects or datablocks saved to the editor managed scripts
    %datablockFiles = new ArrayObject();
-   %datablockFiles.add( "art/shapes/particles/managedParticleData.cs" );
-   %datablockFiles.add( "art/shapes/particles/managedParticleEmitterData.cs" );
+   %datablockFiles.add( "art/particles/managedParticleData.cs" );
+   %datablockFiles.add( "art/particles/managedParticleEmitterData.cs" );
    %datablockFiles.add( "art/decals/managedDecalData.cs" );
    %datablockFiles.add( "art/datablocks/managedDatablocks.cs" );
    %datablockFiles.add( "art/forest/managedItemData.cs" );
diff --git a/Templates/Empty/game/scripts/client/init.cs b/Templates/Empty/game/scripts/client/init.cs
index 4698b8c47..fa9a8b8f9 100644
--- a/Templates/Empty/game/scripts/client/init.cs
+++ b/Templates/Empty/game/scripts/client/init.cs
@@ -110,7 +110,7 @@ function initClient()
    setDefaultFov( $pref::Player::defaultFov );
    setZoomSpeed( $pref::Player::zoomSpeed );
 
-   if( isFile( "./audioData.cs" ) )
+   if( isScriptFile( expandFilename("./audioData.cs") ) )
       exec( "./audioData.cs" );
 
    // Start up the main menu... this is separated out into a
diff --git a/Templates/Empty/game/scripts/server/game.cs b/Templates/Empty/game/scripts/server/game.cs
index 0cda449a1..4826c0de2 100644
--- a/Templates/Empty/game/scripts/server/game.cs
+++ b/Templates/Empty/game/scripts/server/game.cs
@@ -144,8 +144,8 @@ function onServerCreated()
    
    // Load up any objects or datablocks saved to the editor managed scripts
    %datablockFiles = new ArrayObject();
-   %datablockFiles.add( "art/shapes/particles/managedParticleData.cs" );
-   %datablockFiles.add( "art/shapes/particles/managedParticleEmitterData.cs" );
+   %datablockFiles.add( "art/particles/managedParticleData.cs" );
+   %datablockFiles.add( "art/particles/managedParticleEmitterData.cs" );
    %datablockFiles.add( "art/decals/managedDecalData.cs" );
    %datablockFiles.add( "art/datablocks/managedDatablocks.cs" );
    %datablockFiles.add( "art/forest/managedItemData.cs" );
diff --git a/Templates/Empty/game/shaders/common/lighting/advanced/gl/pointLightP.glsl b/Templates/Empty/game/shaders/common/lighting/advanced/gl/pointLightP.glsl
index f38d62fa5..b135f1aa8 100644
--- a/Templates/Empty/game/shaders/common/lighting/advanced/gl/pointLightP.glsl
+++ b/Templates/Empty/game/shaders/common/lighting/advanced/gl/pointLightP.glsl
@@ -221,7 +221,7 @@ void main()
                                     normal, 
                                     normalize( -eyeRay ), 
                                     constantSpecularPower, 
-                                    lightColor.a * lightBrightness );
+                                    shadowed * atten * lightBrightness );
     
    // N.L * Attenuation
    float Sat_NL_Att = clamp( nDotL * atten * shadowed, 0.0, 1.0 );
diff --git a/Templates/Empty/game/shaders/common/lighting/advanced/gl/spotLightP.glsl b/Templates/Empty/game/shaders/common/lighting/advanced/gl/spotLightP.glsl
index 1f7e949d9..d29f5edb0 100644
--- a/Templates/Empty/game/shaders/common/lighting/advanced/gl/spotLightP.glsl
+++ b/Templates/Empty/game/shaders/common/lighting/advanced/gl/spotLightP.glsl
@@ -148,7 +148,7 @@ void main()
                                     normal, 
                                     normalize( -eyeRay ), 
                                     constantSpecularPower, 
-                                    lightColor.a * lightBrightness );
+                                    shadowed * atten * lightBrightness );
     
    // N.L * Attenuation
    float Sat_NL_Att = clamp( nDotL * atten * shadowed, 0.0, 1.0 );
diff --git a/Templates/Empty/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl b/Templates/Empty/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl
index 579f04ad4..bbd567fd0 100644
--- a/Templates/Empty/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl
+++ b/Templates/Empty/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl
@@ -196,7 +196,7 @@ void main()
                                     normal, 
                                     normalize(-wsEyeRay), 
                                     constantSpecularPower, 
-                                    lightColor.a * lightBrightness );
+                                    shadowed * lightBrightness );
    
    float Sat_NL_Att = clamp(dotNL, 0.0, 1.0) * shadowed;
    
diff --git a/Templates/Empty/game/shaders/common/lighting/advanced/pointLightP.hlsl b/Templates/Empty/game/shaders/common/lighting/advanced/pointLightP.hlsl
index 96d00b78d..fbfced097 100644
--- a/Templates/Empty/game/shaders/common/lighting/advanced/pointLightP.hlsl
+++ b/Templates/Empty/game/shaders/common/lighting/advanced/pointLightP.hlsl
@@ -213,7 +213,7 @@ float4 main(   ConvexConnectP IN,
    // Specular term
    float specular = AL_CalcSpecular(   lightVec, 
                                        normal, 
-                                       normalize( -eyeRay ) ) * lightColor.a;
+                                       normalize( -eyeRay ) ) * lightBrightness * atten * shadowed;
 
    float Sat_NL_Att = saturate( nDotL * atten * shadowed ) * lightBrightness;
    float3 lightColorOut = lightMapParams.rgb * lightColor.rgb;
diff --git a/Templates/Empty/game/shaders/common/lighting/advanced/spotLightP.hlsl b/Templates/Empty/game/shaders/common/lighting/advanced/spotLightP.hlsl
index 1cf1f13f5..88e35ad3a 100644
--- a/Templates/Empty/game/shaders/common/lighting/advanced/spotLightP.hlsl
+++ b/Templates/Empty/game/shaders/common/lighting/advanced/spotLightP.hlsl
@@ -141,7 +141,7 @@ float4 main(   ConvexConnectP IN,
    // Specular term
    float specular = AL_CalcSpecular(   -lightToPxlVec, 
                                        normal, 
-                                       normalize( -eyeRay ) ) * lightColor.a;
+                                       normalize( -eyeRay ) ) * lightBrightness * atten * shadowed;
 
    float Sat_NL_Att = saturate( nDotL * atten * shadowed ) * lightBrightness;
    float3 lightColorOut = lightMapParams.rgb * lightColor.rgb;
diff --git a/Templates/Empty/game/shaders/common/lighting/advanced/vectorLightP.hlsl b/Templates/Empty/game/shaders/common/lighting/advanced/vectorLightP.hlsl
index 4be1bc9bd..5ddb5586b 100644
--- a/Templates/Empty/game/shaders/common/lighting/advanced/vectorLightP.hlsl
+++ b/Templates/Empty/game/shaders/common/lighting/advanced/vectorLightP.hlsl
@@ -198,7 +198,7 @@ float4 main( FarFrustumQuadConnectP IN,
    // Specular term
    float specular = AL_CalcSpecular(   -lightDirection, 
                                        normal, 
-                                       normalize(-IN.vsEyeRay) ) * lightColor.a;
+                                       normalize(-IN.vsEyeRay) ) * lightBrightness * shadowed;
                                     
    float Sat_NL_Att = saturate( dotNL * shadowed ) * lightBrightness;
    float3 lightColorOut = lightMapParams.rgb * lightColor.rgb;
diff --git a/Templates/Empty/game/shaders/common/postFx/caustics/causticsP.hlsl b/Templates/Empty/game/shaders/common/postFx/caustics/causticsP.hlsl
index f9242734b..c7635027d 100644
--- a/Templates/Empty/game/shaders/common/postFx/caustics/causticsP.hlsl
+++ b/Templates/Empty/game/shaders/common/postFx/caustics/causticsP.hlsl
@@ -28,23 +28,33 @@ uniform float4    rtParams0;
 uniform float4    waterFogPlane;
 uniform float     accumTime;
 
+float distanceToPlane(float4 plane, float3 pos)
+{
+   return (plane.x * pos.x + plane.y * pos.y + plane.z * pos.z) + plane.w;
+}
+
 float4 main( PFXVertToPix IN, 
              uniform sampler2D prepassTex :register(S0),
              uniform sampler2D causticsTex0 :register(S1),
-             uniform sampler2D causticsTex1 :register(S2),
-             uniform float2 targetSize : register(C0) ) : COLOR
+             uniform sampler2D causticsTex1 :register(S2) ) : COLOR
 {   
    //Sample the pre-pass
-   float2 prepassCoord = ( IN.uv0.xy * rtParams0.zw ) + rtParams0.xy;  
-   float4 prePass = prepassUncondition( prepassTex, prepassCoord );
+   float4 prePass = prepassUncondition( prepassTex, IN.uv0 );
    
    //Get depth
    float depth = prePass.w;   
-   clip( 0.9999 - depth );
+   if(depth > 0.9999)
+      return float4(0,0,0,0);
    
    //Get world position
    float3 pos = eyePosWorld + IN.wsEyeRay * depth;
    
+   // Check the water depth
+   float waterDepth = -distanceToPlane(waterFogPlane, pos);
+   if(waterDepth < 0)
+      return float4(0,0,0,0);
+   waterDepth = saturate(waterDepth);
+   
    //Use world position X and Y to calculate caustics UV 
    float2 causticsUV0 = (abs(pos.xy * 0.25) % float2(1, 1));
    float2 causticsUV1 = (abs(pos.xy * 0.2) % float2(1, 1));
@@ -59,7 +69,7 @@ float4 main( PFXVertToPix IN,
    caustics *= tex2D(causticsTex1, causticsUV1);
    
    //Use normal Z to modulate caustics  
-   float waterDepth = 1 - saturate(pos.z + waterFogPlane.w + 1);
+   //float waterDepth = 1 - saturate(pos.z + waterFogPlane.w + 1);
    caustics *= saturate(prePass.z) * pow(1-depth, 64) * waterDepth; 
       
    return caustics;   
diff --git a/Templates/Empty/game/shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl b/Templates/Empty/game/shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl
new file mode 100644
index 000000000..726fb0afc
--- /dev/null
+++ b/Templates/Empty/game/shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl
@@ -0,0 +1,95 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2012 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#include "shadergen:/autogenConditioners.h"  
+#include "../postFx.hlsl"  
+#include "../../torque.hlsl"
+
+uniform sampler2D backBuffer : register(S0);
+
+uniform float3 LensCenter;    // x=Left X, y=Right X, z=Y
+uniform float2 ScreenCenter;
+uniform float2 Scale;
+uniform float2 ScaleIn;
+uniform float4 HmdWarpParam;
+uniform float4 HmdChromaAbParam; // Chromatic aberration correction
+
+float4 main( PFXVertToPix IN ) : COLOR0  
+{
+   float2 texCoord;
+   float xOffset;
+   float2 lensCenter;
+   lensCenter.y = LensCenter.z;
+   if(IN.uv0.x < 0.5)
+   {
+      texCoord.x = IN.uv0.x;
+      texCoord.y = IN.uv0.y;
+      xOffset = 0.0;
+      lensCenter.x = LensCenter.x;
+   }
+   else
+   {
+      texCoord.x = IN.uv0.x - 0.5;
+      texCoord.y = IN.uv0.y;
+      xOffset = 0.5;
+      lensCenter.x = LensCenter.y;
+   }
+   
+   // Scales input texture coordinates for distortion.
+   // ScaleIn maps texture coordinates to Scales to ([-1, 1]), although top/bottom will be
+   // larger due to aspect ratio.
+   float2 theta = (texCoord - lensCenter) * ScaleIn; // Scales to [-1, 1]
+   float rSq = theta.x * theta.x + theta.y * theta.y;
+   float2 theta1 = theta * (HmdWarpParam.x + HmdWarpParam.y * rSq + HmdWarpParam.z * rSq * rSq + HmdWarpParam.w * rSq * rSq * rSq);
+
+   // Detect whether blue texture coordinates are out of range
+   // since these will scaled out the furthest.
+   float2 thetaBlue = theta1 * (HmdChromaAbParam.z + HmdChromaAbParam.w * rSq);
+   float2 tcBlue = lensCenter + Scale * thetaBlue;
+   
+   float4 color;
+   if (any(clamp(tcBlue, ScreenCenter-float2(0.25,0.5), ScreenCenter+float2(0.25, 0.5)) - tcBlue))
+   {
+      color = float4(0,0,0,0);
+   }
+   else
+   {
+      // Now do blue texture lookup.
+      tcBlue.x += xOffset;
+      float blue = tex2D(backBuffer, tcBlue).b;
+
+      // Do green lookup (no scaling).
+      float2 tcGreen = lensCenter + Scale * theta1;
+      tcGreen.x += xOffset;
+      float green = tex2D(backBuffer, tcGreen).g;
+
+      // Do red scale and lookup.
+      float2 thetaRed = theta1 * (HmdChromaAbParam.x + HmdChromaAbParam.y * rSq);
+      float2 tcRed = lensCenter + Scale * thetaRed;
+      tcRed.x += xOffset;
+      float red = tex2D(backBuffer, tcRed).r;
+
+      color = float4(red, green, blue, 1);
+   }
+
+   return color;    
+}
diff --git a/Templates/Empty/game/shaders/common/postFx/turbulenceP.hlsl b/Templates/Empty/game/shaders/common/postFx/turbulenceP.hlsl
index a8f79cfee..0fb38ea07 100644
--- a/Templates/Empty/game/shaders/common/postFx/turbulenceP.hlsl
+++ b/Templates/Empty/game/shaders/common/postFx/turbulenceP.hlsl
@@ -23,23 +23,20 @@
 #include "./postFx.hlsl"
 
 uniform float  accumTime;
+uniform float2 projectionOffset;
+uniform float4 targetViewport;
 
 float4 main( PFXVertToPix IN, uniform sampler2D inputTex : register(S0) ) : COLOR
 {
-	float reduction = 128;	
-	float power = 1.0;
-	float speed = 3.0;
-	float frequency=8;
+	float speed = 2.0;
+	float distortion = 6.0;
 	
-	float backbuffer_edge_coef=0.98;
-	float2 screen_center = float2(0.5, 0.5);	
-	float2 cPos = (IN.uv0 - screen_center);
+	float y = IN.uv0.y + (cos((IN.uv0.y+projectionOffset.y) * distortion + accumTime * speed) * 0.01);
+   float x = IN.uv0.x + (sin((IN.uv0.x+projectionOffset.x) * distortion + accumTime * speed) * 0.01);
+
+   // Clamp the calculated uv values to be within the target's viewport
+	y = clamp(y, targetViewport.y, targetViewport.w);
+	x = clamp(x, targetViewport.x, targetViewport.z);
 	
-	float len = 1.0 - length(cPos);		
-	float2 uv = clamp((cPos / len * cos(len * frequency - (accumTime * speed)) * (power / reduction)), 0, 1);
-	return tex2D(inputTex, IN.uv0 * backbuffer_edge_coef + uv);
-
-//    float4 color = tex2D(inputTex, IN.uv0 * backbuffer_edge_coef+(sin*right));           
-//	return color;
-
-}
\ No newline at end of file
+    return tex2D (inputTex, float2(x, y));
+}
diff --git a/Templates/Empty/game/shaders/common/postFx/underwaterFogP.hlsl b/Templates/Empty/game/shaders/common/postFx/underwaterFogP.hlsl
index a0aa90ecd..c01467e29 100644
--- a/Templates/Empty/game/shaders/common/postFx/underwaterFogP.hlsl
+++ b/Templates/Empty/game/shaders/common/postFx/underwaterFogP.hlsl
@@ -132,7 +132,7 @@ float4 main( PFXVertToPix IN ) : COLOR
    inColor.rgb *= 1.0 - saturate( abs( planeDist ) / WET_DEPTH ) * WET_DARKENING;
    //return float4( inColor, 1 );
    
-   float3 outColor = lerp( inColor, fogColor, fogAmt );
+   float3 outColor = lerp( inColor, fogColor.rgb, fogAmt );
    
    return float4( hdrEncode( outColor ), 1 );        
 }
\ No newline at end of file
diff --git a/Templates/Empty/game/tools/classIcons/NavMesh.png b/Templates/Empty/game/tools/classIcons/NavMesh.png
new file mode 100644
index 000000000..056d3c3ac
Binary files /dev/null and b/Templates/Empty/game/tools/classIcons/NavMesh.png differ
diff --git a/Templates/Empty/game/tools/classIcons/NavPath.png b/Templates/Empty/game/tools/classIcons/NavPath.png
new file mode 100644
index 000000000..35b8372ae
Binary files /dev/null and b/Templates/Empty/game/tools/classIcons/NavPath.png differ
diff --git a/Templates/Empty/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui b/Templates/Empty/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui
index c6153eadf..90298419b 100644
--- a/Templates/Empty/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui
+++ b/Templates/Empty/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui
@@ -1522,7 +1522,7 @@
                         profile = "ToolsGuiTransparentProfile";
                         isContainer = "1";
                         position = "0 0";
-                        Extent = "185 22";
+                        Extent = "185 44";
                         HorizSizing = "width";
                         
                         new GuiCheckBoxCtrl() {
@@ -1568,6 +1568,14 @@
                            useMouseEvents = "0";
                         };
                         
+                        new GuiTextCtrl() {
+                           HorizSizing = "right";
+                           VertSizing = "bottom";
+                           position = "9 26";
+                           Extent = "72 16";
+                           text = "Spec strength";
+                        };
+
                         new GuiControl() {
                            class = "AggregateControl";
                            position = "91 4";
@@ -1589,7 +1597,7 @@
                               Command = "MaterialEditorGui.updateActiveMaterial(\"specularPower[\" @ MaterialEditorGui.currentLayer @ \"]\", mCeil($ThisControl.getValue()), true, true);";
                               AltCommand = "$ThisControl.getParent().updateFromChild($ThisControl); MaterialEditorGui.updateActiveMaterial(\"specularPower[\" @ MaterialEditorGui.currentLayer @ \"]\", mCeil($ThisControl.getValue()), true, false);";
                               tooltipprofile = "ToolsGuiDefaultProfile";
-                              ToolTip = "Sets the strength of the Pixel Specular value.";
+                              ToolTip = "Sets the hardness of the Pixel Specular value.";
                               hovertime = "1000";
                               range = "1 128";
                               ticks = "0";
@@ -1618,6 +1626,57 @@
                               maxLength = "3";
                            };
                         };
+
+                        new GuiControl() {
+                           class = "AggregateControl";
+                           position = "91 26";
+                           Extent = "96 20";
+
+                           new GuiSliderCtrl() {
+                              canSaveDynamicFields = "0";
+                              internalName = "specularStrengthSlider";
+                              Enabled = "1";
+                              isContainer = "0";
+                              Profile = "ToolsGuiSliderProfile";
+                              HorizSizing = "right";
+                              VertSizing = "bottom";
+                              position = "0 1";
+                              Extent = "61 14";
+                              MinExtent = "8 2";
+                              canSave = "1";
+                              Visible = "1";
+                              Command = "MaterialEditorGui.updateActiveMaterial(\"specularStrength[\" @ MaterialEditorGui.currentLayer @ \"]\", $ThisControl.getValue(), true, true);";
+                              AltCommand = "$ThisControl.getParent().updateFromChild($ThisControl); MaterialEditorGui.updateActiveMaterial(\"specularStrength[\" @ MaterialEditorGui.currentLayer @ \"]\", $ThisControl.getValue(), true, false);";
+                              tooltipprofile = "ToolsGuiDefaultProfile";
+                              ToolTip = "Sets the strength of the Pixel Specular value.";
+                              hovertime = "1000";
+                              range = "0 5";
+                              ticks = "0";
+                              value = "1";
+                           };
+                           new GuiTextEditCtrl() {
+                              canSaveDynamicFields = "0";
+                              internalName = "specularStrengthTextEdit";
+                              Enabled = "1";
+                              isContainer = "0";
+                              Profile = "ToolsGuiTextEditProfile";
+                              HorizSizing = "right";
+                              VertSizing = "bottom";
+                              position = "64 0";
+                              Extent = "29 18";
+                              MinExtent = "8 2";
+                              canSave = "1";
+                              Visible = "1";
+                              Command = "$ThisControl.getParent().updateFromChild($ThisControl); MaterialEditorGui.updateActiveMaterial(\"specularStrength[\" @ MaterialEditorGui.currentLayer @ \"]\", $ThisControl.getValue());";
+                              hovertime = "1000";
+                              AnchorTop = "1";
+                              AnchorBottom = "0";
+                              AnchorLeft = "1";
+                              AnchorRight = "0";
+                              text = "1";
+                              maxLength = "3";
+                           };
+                        };
                      };
                      new GuiContainer(){ // glow emissive
                         profile = "ToolsGuiTransparentProfile";
diff --git a/Templates/Empty/game/tools/materialEditor/scripts/materialEditor.ed.cs b/Templates/Empty/game/tools/materialEditor/scripts/materialEditor.ed.cs
index b13b20b46..ec1020fa4 100644
--- a/Templates/Empty/game/tools/materialEditor/scripts/materialEditor.ed.cs
+++ b/Templates/Empty/game/tools/materialEditor/scripts/materialEditor.ed.cs
@@ -894,6 +894,8 @@ function MaterialEditorGui::guiSync( %this, %material )
    
    MaterialEditorPropertiesWindow-->specularPowerTextEdit.setText((%material).specularPower[%layer]);
    MaterialEditorPropertiesWindow-->specularPowerSlider.setValue((%material).specularPower[%layer]);
+   MaterialEditorPropertiesWindow-->specularStrengthTextEdit.setText((%material).specularStrength[%layer]);
+   MaterialEditorPropertiesWindow-->specularStrengthSlider.setValue((%material).specularStrength[%layer]);
    MaterialEditorPropertiesWindow-->pixelSpecularCheckbox.setValue((%material).pixelSpecular[%layer]);
    MaterialEditorPropertiesWindow-->glowCheckbox.setValue((%material).glow[%layer]);
    MaterialEditorPropertiesWindow-->emissiveCheckbox.setValue((%material).emissive[%layer]);
@@ -2142,9 +2144,7 @@ function MaterialEditorGui::changeMaterial(%this, %fromMaterial, %toMaterial)
       
       MaterialEditorGui.currentObject.changeMaterial( %materialTarget, %fromMaterial.getName(), %toMaterial.getName() );
       
-      if( MaterialEditorGui.currentObject.interiorFile !$= "" )
-         %sourcePath = MaterialEditorGui.currentObject.interiorFile;
-      else if( MaterialEditorGui.currentObject.shapeName !$= "" ) 
+      if( MaterialEditorGui.currentObject.shapeName !$= "" ) 
          %sourcePath = MaterialEditorGui.currentObject.shapeName;
       else if( MaterialEditorGui.currentObject.isMethod("getDatablock") )
       {
diff --git a/Templates/Empty/game/tools/particleEditor/ParticleEditor.ed.gui b/Templates/Empty/game/tools/particleEditor/ParticleEditor.ed.gui
index cbc17ca61..865334dbd 100644
--- a/Templates/Empty/game/tools/particleEditor/ParticleEditor.ed.gui
+++ b/Templates/Empty/game/tools/particleEditor/ParticleEditor.ed.gui
@@ -1069,7 +1069,53 @@ $PE_guielement_ext_colorpicker = "18 18";
                            position = $PE_guielement_pos_value;
                            Extent = $PE_guielement_ext_value;
                            altCommand = "$ThisControl.getParent().updateFromChild($ThisControl); PE_EmitterEditor.updateEmitter( \"ejectionOffset\", $ThisControl.getText());";
-                        };             
+                        };
+					 };
+                     new GuiControl(){ // Emitter Offset Variance
+                        class = "AggregateControl";
+                        isContainer = "1";
+                        HorizSizing = "width";
+                        VertSizing = "bottom";
+                        Position = $PE_guielement_pos_single_container ;
+                        Extent = $PE_guielement_ext_single_container ;
+                        
+                        new GuiTextCtrl() {
+                           Profile = "ToolsGuiTextProfile";
+                           HorizSizing = "width";
+                           VertSizing = "bottom";
+                           position = $PE_guielement_pos_name;
+                           Extent = $PE_guielement_ext_name;
+                           text = "OffsetVariance";
+                        };
+                        new GuiSliderCtrl(PEE_ejectionOffsetVariance) {
+                           internalName = "PEE_ejectionOffsetVariance_slider";
+                           canSaveDynamicFields = "0";
+                           Enabled = "1";
+                           isContainer = "0";
+                           Profile = "ToolsGuiSliderProfile";
+                           HorizSizing = "left";
+                           VertSizing = "bottom";
+                           position = $PE_guielement_pos_slider;
+                           Extent = $PE_guielement_ext_slider;
+                           MinExtent = "8 2";
+                           canSave = "1";
+                           Visible = "1";
+                           Command = "PE_EmitterEditor.updateEmitter( \"ejectionOffsetVariance\", $ThisControl.getValue(), true, true );";
+                           altCommand = "$ThisControl.getParent().updateFromChild($ThisControl); PE_EmitterEditor.updateEmitter( \"ejectionOffsetVariance\", $ThisControl.getValue(), true, false );";
+                           hovertime = "1000";
+                           range = "0 25";
+                           ticks = "0";
+                           value = "0";
+                        };
+                        new GuiTextEditCtrl() {
+                           internalName = "PEE_ejectionOffsetVariance_textEdit";
+                           Profile = "ToolsGuiTextEditProfile";
+                           HorizSizing = "left";
+                           VertSizing = "bottom";
+                           position = $PE_guielement_pos_value;
+                           Extent = $PE_guielement_ext_value;
+                           altCommand = "$ThisControl.getParent().updateFromChild($ThisControl); PE_EmitterEditor.updateEmitter( \"ejectionOffsetVariance\", $ThisControl.getText());";
+                        };               
                      };
                   }; // end stack
                }; // end "spread" rollout
diff --git a/Templates/Empty/game/tools/worldEditor/gui/EditorGui.ed.gui b/Templates/Empty/game/tools/worldEditor/gui/EditorGui.ed.gui
index 484d65fe9..445bacf63 100644
--- a/Templates/Empty/game/tools/worldEditor/gui/EditorGui.ed.gui
+++ b/Templates/Empty/game/tools/worldEditor/gui/EditorGui.ed.gui
@@ -531,7 +531,7 @@
       selectionHidden = "1";
       renderVertexSelection = "1";
       processUsesBrush = "0";
-      maxBrushSize = "40 40";
+      maxBrushSize = "256 256";
       adjustHeightVal = "10";
       setHeightVal = "100";
       scaleVal = "1";
diff --git a/Templates/Empty/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui b/Templates/Empty/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui
index 34e880497..cc9d6983b 100644
--- a/Templates/Empty/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui
+++ b/Templates/Empty/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui
@@ -517,7 +517,7 @@ new GuiMouseEventCtrl(TerrainBrushSizeSliderCtrlContainer,EditorGuiGroup) {
       canSave = "1";
       Visible = "1";
       AltCommand = "TerrainBrushSizeTextEditContainer-->textEdit.setValue(mCeil($ThisControl.getValue())); ETerrainEditor.setBrushSize( $ThisControl.value );";
-      range = "1 40";
+      range = "1 256";
       ticks = "0";
       value = "0";
    };
diff --git a/Templates/Empty/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui b/Templates/Empty/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui
index 3f98f8b60..8cdba481d 100644
--- a/Templates/Empty/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui
+++ b/Templates/Empty/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui
@@ -494,7 +494,7 @@ new GuiMouseEventCtrl(PaintBrushSizeSliderCtrlContainer,EditorGuiGroup) {
       canSave = "1";
       Visible = "1";
       AltCommand = "PaintBrushSizeTextEditContainer-->textEdit.setValue(mFloatLength( ($ThisControl.getValue()), 2 )); ETerrainEditor.setBrushSize( $ThisControl.value );";
-      range = "1 40";
+      range = "1 256";
       ticks = "0";
       value = "0";
    };
diff --git a/Templates/Empty/game/tools/worldEditor/scripts/editors/creator.ed.cs b/Templates/Empty/game/tools/worldEditor/scripts/editors/creator.ed.cs
index 18493ef4a..2e0a3165c 100644
--- a/Templates/Empty/game/tools/worldEditor/scripts/editors/creator.ed.cs
+++ b/Templates/Empty/game/tools/worldEditor/scripts/editors/creator.ed.cs
@@ -82,6 +82,8 @@ function EWCreatorWindow::init( %this )
       %this.registerMissionObject( "SpawnSphere",  "Observer Spawn Sphere", "ObserverDropPoint" );
       %this.registerMissionObject( "SFXSpace",      "Sound Space" );
       %this.registerMissionObject( "OcclusionVolume", "Occlusion Volume" );
+      %this.registerMissionObject("NavMesh", "Navigation mesh");
+      %this.registerMissionObject("NavPath", "Path");
       
    %this.endGroup();
    
diff --git a/Templates/Full/game/art/datablocks/player.cs b/Templates/Full/game/art/datablocks/player.cs
index aceadc610..a942f23de 100644
--- a/Templates/Full/game/art/datablocks/player.cs
+++ b/Templates/Full/game/art/datablocks/player.cs
@@ -385,6 +385,7 @@ datablock ParticleData(LightPuff)
    times[0]      = 0.3;
    times[1]      = 1.0;
    times[2] = 1.0;
+   textureName = "art/particles/dustParticle.png";
 };
 
 datablock ParticleEmitterData(LightPuffEmitter)
diff --git a/Templates/Full/game/art/terrains/materials.cs b/Templates/Full/game/art/terrains/materials.cs
index 7929b03df..e49e568f0 100644
--- a/Templates/Full/game/art/terrains/materials.cs
+++ b/Templates/Full/game/art/terrains/materials.cs
@@ -29,9 +29,12 @@ singleton Material(TerrainFX_grass1)
    mapTo = "grass1";  
    footstepSoundId = 0;  
    terrainMaterials = "1";  
-   ShowDust = "false";  
-   showFootprints = "false"; 
+   ShowDust = "1";  
+   showFootprints = "1"; 
    materialTag0 = "Terrain"; 
+   effectColor[0] = "0.42 0.42 0 1";
+   effectColor[1] = "0.42 0.42 0 1";
+   impactSoundId = "0";
 };  
 
 new TerrainMaterial()
@@ -51,9 +54,10 @@ singleton Material(TerrainFX_grass1dry)
    mapTo = "grass1-dry";  
    footstepSoundId = 0;  
    terrainMaterials = "1";  
-   ShowDust = "false";  
-   showFootprints = "false";
+   ShowDust = "1";  
+   showFootprints = "1";
    materialTag0 = "Terrain";  
+   effectColor[0] = "0.63 0.55 0 1";
 };  
 
 new TerrainMaterial()
@@ -75,9 +79,11 @@ singleton Material(TerrainFX_dirt_grass)
    mapTo = "dirt_grass";  
    footstepSoundId = 0;  
    terrainMaterials = "1";  
-   ShowDust = "false";  
-   showFootprints = "false";
+   ShowDust = "1";  
+   showFootprints = "1";
    materialTag0 = "Terrain";  
+   effectColor[0] = "0.63 0.55 0 1";
+   effectColor[1] = "0.63 0.55 0 0";
 };  
 
 new TerrainMaterial()
@@ -100,11 +106,14 @@ new TerrainMaterial()
 singleton Material(TerrainFX_rocktest)  
 {  
    mapTo = "rocktest";  
-   footstepSoundId = 0;  
+   footstepSoundId = "1";  
    terrainMaterials = "1";  
-   ShowDust = "false";  
-   showFootprints = "false"; 
+   ShowDust = "1";  
+   showFootprints = "1"; 
    materialTag0 = "Terrain"; 
+   impactSoundId = "1";
+   effectColor[0] = "0.25 0.25 0.25 1";
+   effectColor[1] = "0.25 0.25 0.25 0";
 };  
 
 new TerrainMaterial()
@@ -127,11 +136,14 @@ new TerrainMaterial()
 singleton Material(TerrainFX_sand)  
 {  
    mapTo = "sand";  
-   footstepSoundId = 0;  
+   footstepSoundId = "3";  
    terrainMaterials = "1";  
-   ShowDust = "false";  
-   showFootprints = "false"; 
+   ShowDust = "1";  
+   showFootprints = "1"; 
    materialTag0 = "Terrain";  
+   specularPower[0] = "1";
+   effectColor[0] = "0.84 0.71 0.5 1";
+   effectColor[1] = "0.84 0.71 0.5 0.349";
 };  
 
 new TerrainMaterial()
diff --git a/Templates/Full/game/core/art/datablocks/defaultparticle.cs b/Templates/Full/game/core/art/datablocks/defaultparticle.cs
index 017d30648..7f973d525 100644
--- a/Templates/Full/game/core/art/datablocks/defaultparticle.cs
+++ b/Templates/Full/game/core/art/datablocks/defaultparticle.cs
@@ -62,5 +62,5 @@ datablock ParticleEmitterData(DefaultEmitter)
    particles = "DefaultParticle";
    blendStyle = "ADDITIVE";
    softParticles = "0";
-   softnessDistance = "1000";
+   softnessDistance = "1";
 };
diff --git a/Templates/Full/game/core/scripts/client/oculusVR.cs b/Templates/Full/game/core/scripts/client/oculusVR.cs
index 81015b40f..f0035999c 100644
--- a/Templates/Full/game/core/scripts/client/oculusVR.cs
+++ b/Templates/Full/game/core/scripts/client/oculusVR.cs
@@ -63,7 +63,14 @@ function enableOculusVRDisplay(%gameConnection, %trueStereoRendering)
    
    if(%trueStereoRendering)
    {
-      OVRBarrelDistortionPostFX.isEnabled = true;
+      if($pref::OculusVR::UseChromaticAberrationCorrection)
+      {
+         OVRBarrelDistortionChromaPostFX.isEnabled = true;
+      }
+      else
+      {
+         OVRBarrelDistortionPostFX.isEnabled = true;
+      }
    }
    else
    {
@@ -81,6 +88,7 @@ function disableOculusVRDisplay(%gameConnection)
    %gameConnection.clearDisplayDevice();
    PlayGui.renderStyle = "standard";
    OVRBarrelDistortionPostFX.isEnabled = false;
+   OVRBarrelDistortionChromaPostFX.isEnabled = false;
    OVRBarrelDistortionMonoPostFX.isEnabled = false;
 }
 
@@ -112,7 +120,7 @@ function setStandardOculusVRControlScheme(%gameConnection)
 function setVideoModeForOculusVRDisplay(%fullscreen)
 {
    %res = getOVRHMDResolution(0);
-   Canvas.setVideoMode(%res.x, %res.y, %fullscreen, 32, 0);
+   Canvas.setVideoMode(%res.x, %res.y, %fullscreen, 32, 4);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Templates/Full/game/core/scripts/client/postFx/caustics.cs b/Templates/Full/game/core/scripts/client/postFx/caustics.cs
index e73e112a5..c6a694c51 100644
--- a/Templates/Full/game/core/scripts/client/postFx/caustics.cs
+++ b/Templates/Full/game/core/scripts/client/postFx/caustics.cs
@@ -41,16 +41,11 @@ singleton ShaderData( PFX_CausticsShader )
    //OGLVertexShaderFile  = "shaders/common/postFx/gl//postFxV.glsl";
    //OGLPixelShaderFile   = "shaders/common/postFx/gl/passthruP.glsl";
       
-   samplerNames[0] = "$prepassTex";
-   samplerNames[1] = "$causticsTex1";
-   samplerNames[2] = "$causticsTex2";
-   
    pixVersion = 3.0;
 };
 
 singleton PostEffect( CausticsPFX )
 {
-   requirements = "None";
    isEnabled = false;
    renderTime = "PFXBeforeBin";
    renderBin = "ObjTranslucentBin";      
@@ -62,17 +57,4 @@ singleton PostEffect( CausticsPFX )
    texture[1] = "textures/caustics_1";
    texture[2] = "textures/caustics_2";
    target = "$backBuffer";
-   
 };
-
-// this effects the timing of the animation -
-
-$CausticsPFX::refTime = getSimTime();
-
-function CausticsPFX::setShaderConsts(%this)
-{
-   //echo($Sim::time - %this.timeStart);
-   //echo(%this.timeConst);
-   %this.setShaderConst( "$refTime", $CausticsPFX::refTime ); 
-}
-
diff --git a/Templates/Full/game/core/scripts/client/postFx/fog.cs b/Templates/Full/game/core/scripts/client/postFx/fog.cs
index 44539cb0b..5257db595 100644
--- a/Templates/Full/game/core/scripts/client/postFx/fog.cs
+++ b/Templates/Full/game/core/scripts/client/postFx/fog.cs
@@ -117,3 +117,16 @@ singleton PostEffect( UnderwaterFogPostFx )
    isEnabled = true;
 };
 
+function UnderwaterFogPostFx::onEnabled( %this )
+{
+   TurbulenceFx.enable();
+   CausticsPFX.enable();
+   return true;
+}
+
+function UnderwaterFogPostFx::onDisabled( %this )
+{
+   TurbulenceFx.disable();
+   CausticsPFX.disable();
+   return false;
+}
diff --git a/Templates/Full/game/core/scripts/client/postFx/ovrBarrelDistortion.cs b/Templates/Full/game/core/scripts/client/postFx/ovrBarrelDistortion.cs
index 7978a6bc8..cbd72ec84 100644
--- a/Templates/Full/game/core/scripts/client/postFx/ovrBarrelDistortion.cs
+++ b/Templates/Full/game/core/scripts/client/postFx/ovrBarrelDistortion.cs
@@ -44,6 +44,14 @@ singleton ShaderData( OVRBarrelDistortionShader )
    pixVersion = 2.0;   
 };
 
+singleton ShaderData( OVRBarrelDistortionChromaShader )
+{
+   DXVertexShaderFile 	= "shaders/common/postFx/postFxV.hlsl";
+   DXPixelShaderFile 	= "shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl";
+
+   pixVersion = 2.0;   
+};
+
 //-----------------------------------------------------------------------------
 // GFX state blocks
 //-----------------------------------------------------------------------------
@@ -78,6 +86,32 @@ singleton BarrelDistortionPostEffect( OVRBarrelDistortionPostFX )
    scaleOutput = 1.25;
 };
 
+//-----------------------------------------------------------------------------
+// Barrel Distortion with Chromatic Aberration Correction PostFx
+//
+// To be used with the Oculus Rift.
+// Expects a stereo pair to exist on the back buffer and then applies the
+// appropriate barrel distortion.
+// This version applies a chromatic aberration correction during the
+// barrel distortion.
+//-----------------------------------------------------------------------------
+singleton BarrelDistortionPostEffect( OVRBarrelDistortionChromaPostFX )
+{
+   isEnabled = false;
+   allowReflectPass = false;
+   
+   renderTime = "PFXAfterDiffuse";
+   renderPriority = 100;
+
+   // The barrel distortion   
+   shader = OVRBarrelDistortionChromaShader;
+   stateBlock = OVRBarrelDistortionStateBlock;
+   
+   texture[0] = "$backBuffer";
+   
+   scaleOutput = 1.25;
+};
+
 //-----------------------------------------------------------------------------
 // Barrel Distortion Mono PostFx
 //
diff --git a/Templates/Full/game/core/scripts/client/postFx/ssao.cs b/Templates/Full/game/core/scripts/client/postFx/ssao.cs
index 69977f352..cd5bfc598 100644
--- a/Templates/Full/game/core/scripts/client/postFx/ssao.cs
+++ b/Templates/Full/game/core/scripts/client/postFx/ssao.cs
@@ -190,6 +190,7 @@ singleton PostEffect( SSAOPostFx )
    
    target = "$outTex";
    targetScale = "0.5 0.5";
+   targetViewport = "PFXTargetViewport_NamedInTexture0";
    
    singleton PostEffect()
    {
diff --git a/Templates/Full/game/core/scripts/client/postFx/turbulence.cs b/Templates/Full/game/core/scripts/client/postFx/turbulence.cs
index af4d7dc7f..767470e67 100644
--- a/Templates/Full/game/core/scripts/client/postFx/turbulence.cs
+++ b/Templates/Full/game/core/scripts/client/postFx/turbulence.cs
@@ -20,44 +20,34 @@
 // IN THE SOFTWARE.
 //-----------------------------------------------------------------------------
 
+singleton GFXStateBlockData( PFX_TurbulenceStateBlock : PFX_DefaultStateBlock)  
+{  
+   zDefined = false;
+   zEnable = false;  
+   zWriteEnable = false;  
+        
+   samplersDefined = true;  
+   samplerStates[0] = SamplerClampLinear;
+};  
+  
 singleton ShaderData( PFX_TurbulenceShader )
 {   
    DXVertexShaderFile 	= "shaders/common/postFx/postFxV.hlsl";
    DXPixelShaderFile 	= "shaders/common/postFx/turbulenceP.hlsl";
            
-   samplerNames[0] = "$inputTex";
    pixVersion = 3.0;
 };
 
 singleton PostEffect( TurbulenceFx )  
 {  
-   requirements = "None";
    isEnabled = false;    
    allowReflectPass = true;  
          
-   renderTime = "PFXAfterDiffuse";  
-   renderBin = "ObjTranslucentBin";     
+   renderTime = "PFXAfterBin";
+   renderBin = "GlowBin";
+   renderPriority = 0.5; // Render after the glows themselves
      
    shader = PFX_TurbulenceShader;  
-   stateBlock = PFX_myShaderStateBlock;  
-   texture[0] = "$backBuffer";  
-      
-   renderPriority = 0.1;  
+   stateBlock=PFX_TurbulenceStateBlock;
+   texture[0] = "$backBuffer";      
  };
-
-function TurbulenceFx::setShaderConsts(%this)
-{
-   %this.setShaderConst(%this.timeConst, $Sim::time - %this.timeStart); 
-}
-
-function UnderwaterFogPostFx::onEnabled( %this )
-{
-   TurbulenceFx.enable();
-   return true;
-}
-
-function UnderwaterFogPostFx::onDisabled( %this )
-{
-   TurbulenceFx.disable();
-   return false;
-}
\ No newline at end of file
diff --git a/Templates/Full/game/scripts/client/init.cs b/Templates/Full/game/scripts/client/init.cs
index 29a10976b..3f7f2d96a 100644
--- a/Templates/Full/game/scripts/client/init.cs
+++ b/Templates/Full/game/scripts/client/init.cs
@@ -124,7 +124,7 @@ function initClient()
    setDefaultFov( $pref::Player::defaultFov );
    setZoomSpeed( $pref::Player::zoomSpeed );
 
-   if( isFile( "./audioData.cs" ) )
+   if( isScriptFile( expandFilename("./audioData.cs") ) )
       exec( "./audioData.cs" );
 
    // Start up the main menu... this is separated out into a
diff --git a/Templates/Full/game/scripts/server/aiPlayer.cs b/Templates/Full/game/scripts/server/aiPlayer.cs
index 1fd0802bf..b9f5a66a6 100644
--- a/Templates/Full/game/scripts/server/aiPlayer.cs
+++ b/Templates/Full/game/scripts/server/aiPlayer.cs
@@ -140,26 +140,31 @@ function AIPlayer::followPath(%this,%path,%node)
 
 function AIPlayer::moveToNextNode(%this)
 {
-   if (%this.targetNode < 0 || %this.currentNode < %this.targetNode)
-   {
-      if (%this.currentNode < %this.path.getCount() - 1)
-         %this.moveToNode(%this.currentNode + 1);
-      else
-         %this.moveToNode(0);
+   %pathNodeCount=%this.path.getCount();
+   %slowdown=0;
+
+   %targetNode=%this.currentNode + 1;
+
+   if (%this.path.isLooping) {
+      %targetNode %= %pathNodeCount;
+   } else {
+      if (%targetNode >= %pathNodeCount-1) {
+         %targetNode=%pathNodeCount-1;
+
+         if (%currentNode < %targetNode)
+            %slowdown=1;
+      }
    }
-   else
-      if (%this.currentNode == 0)
-         %this.moveToNode(%this.path.getCount() - 1);
-      else
-         %this.moveToNode(%this.currentNode - 1);
+
+   %this.moveToNode(%targetNode, %slowdown);
 }
 
-function AIPlayer::moveToNode(%this,%index)
+function AIPlayer::moveToNode(%this,%index,%slowdown)
 {
    // Move to the given path node index
    %this.currentNode = %index;
    %node = %this.path.getObject(%index);
-   %this.setMoveDestination(%node.getTransform());
+   %this.setMoveDestination(%node.getTransform(),%slowdown);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/Templates/Full/game/scripts/server/shapeBase.cs b/Templates/Full/game/scripts/server/shapeBase.cs
index 4af13dcf5..128278883 100644
--- a/Templates/Full/game/scripts/server/shapeBase.cs
+++ b/Templates/Full/game/scripts/server/shapeBase.cs
@@ -101,7 +101,7 @@ function ShapeBase::clearDamageDt(%this)
 // ShapeBase datablock
 //-----------------------------------------------------------------------------
 
-function ShapeBaseData::damage(%this, %obj, %position, %source, %amount, %damageType)
+function ShapeBaseData::damage(%this, %obj, %source, %position, %amount, %damageType)
 {
    // Ignore damage by default. This empty method is here to
    // avoid console warnings.
diff --git a/Templates/Full/game/shaders/common/lighting/advanced/gl/pointLightP.glsl b/Templates/Full/game/shaders/common/lighting/advanced/gl/pointLightP.glsl
index f38d62fa5..b135f1aa8 100644
--- a/Templates/Full/game/shaders/common/lighting/advanced/gl/pointLightP.glsl
+++ b/Templates/Full/game/shaders/common/lighting/advanced/gl/pointLightP.glsl
@@ -221,7 +221,7 @@ void main()
                                     normal, 
                                     normalize( -eyeRay ), 
                                     constantSpecularPower, 
-                                    lightColor.a * lightBrightness );
+                                    shadowed * atten * lightBrightness );
     
    // N.L * Attenuation
    float Sat_NL_Att = clamp( nDotL * atten * shadowed, 0.0, 1.0 );
diff --git a/Templates/Full/game/shaders/common/lighting/advanced/gl/spotLightP.glsl b/Templates/Full/game/shaders/common/lighting/advanced/gl/spotLightP.glsl
index 1f7e949d9..d29f5edb0 100644
--- a/Templates/Full/game/shaders/common/lighting/advanced/gl/spotLightP.glsl
+++ b/Templates/Full/game/shaders/common/lighting/advanced/gl/spotLightP.glsl
@@ -148,7 +148,7 @@ void main()
                                     normal, 
                                     normalize( -eyeRay ), 
                                     constantSpecularPower, 
-                                    lightColor.a * lightBrightness );
+                                    shadowed * atten * lightBrightness );
     
    // N.L * Attenuation
    float Sat_NL_Att = clamp( nDotL * atten * shadowed, 0.0, 1.0 );
diff --git a/Templates/Full/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl b/Templates/Full/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl
index 579f04ad4..bbd567fd0 100644
--- a/Templates/Full/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl
+++ b/Templates/Full/game/shaders/common/lighting/advanced/gl/vectorLightP.glsl
@@ -196,7 +196,7 @@ void main()
                                     normal, 
                                     normalize(-wsEyeRay), 
                                     constantSpecularPower, 
-                                    lightColor.a * lightBrightness );
+                                    shadowed * lightBrightness );
    
    float Sat_NL_Att = clamp(dotNL, 0.0, 1.0) * shadowed;
    
diff --git a/Templates/Full/game/shaders/common/lighting/advanced/pointLightP.hlsl b/Templates/Full/game/shaders/common/lighting/advanced/pointLightP.hlsl
index 96d00b78d..fbfced097 100644
--- a/Templates/Full/game/shaders/common/lighting/advanced/pointLightP.hlsl
+++ b/Templates/Full/game/shaders/common/lighting/advanced/pointLightP.hlsl
@@ -213,7 +213,7 @@ float4 main(   ConvexConnectP IN,
    // Specular term
    float specular = AL_CalcSpecular(   lightVec, 
                                        normal, 
-                                       normalize( -eyeRay ) ) * lightColor.a;
+                                       normalize( -eyeRay ) ) * lightBrightness * atten * shadowed;
 
    float Sat_NL_Att = saturate( nDotL * atten * shadowed ) * lightBrightness;
    float3 lightColorOut = lightMapParams.rgb * lightColor.rgb;
diff --git a/Templates/Full/game/shaders/common/lighting/advanced/spotLightP.hlsl b/Templates/Full/game/shaders/common/lighting/advanced/spotLightP.hlsl
index 1cf1f13f5..88e35ad3a 100644
--- a/Templates/Full/game/shaders/common/lighting/advanced/spotLightP.hlsl
+++ b/Templates/Full/game/shaders/common/lighting/advanced/spotLightP.hlsl
@@ -141,7 +141,7 @@ float4 main(   ConvexConnectP IN,
    // Specular term
    float specular = AL_CalcSpecular(   -lightToPxlVec, 
                                        normal, 
-                                       normalize( -eyeRay ) ) * lightColor.a;
+                                       normalize( -eyeRay ) ) * lightBrightness * atten * shadowed;
 
    float Sat_NL_Att = saturate( nDotL * atten * shadowed ) * lightBrightness;
    float3 lightColorOut = lightMapParams.rgb * lightColor.rgb;
diff --git a/Templates/Full/game/shaders/common/lighting/advanced/vectorLightP.hlsl b/Templates/Full/game/shaders/common/lighting/advanced/vectorLightP.hlsl
index 4be1bc9bd..5ddb5586b 100644
--- a/Templates/Full/game/shaders/common/lighting/advanced/vectorLightP.hlsl
+++ b/Templates/Full/game/shaders/common/lighting/advanced/vectorLightP.hlsl
@@ -198,7 +198,7 @@ float4 main( FarFrustumQuadConnectP IN,
    // Specular term
    float specular = AL_CalcSpecular(   -lightDirection, 
                                        normal, 
-                                       normalize(-IN.vsEyeRay) ) * lightColor.a;
+                                       normalize(-IN.vsEyeRay) ) * lightBrightness * shadowed;
                                     
    float Sat_NL_Att = saturate( dotNL * shadowed ) * lightBrightness;
    float3 lightColorOut = lightMapParams.rgb * lightColor.rgb;
diff --git a/Templates/Full/game/shaders/common/postFx/caustics/causticsP.hlsl b/Templates/Full/game/shaders/common/postFx/caustics/causticsP.hlsl
index f9242734b..c7635027d 100644
--- a/Templates/Full/game/shaders/common/postFx/caustics/causticsP.hlsl
+++ b/Templates/Full/game/shaders/common/postFx/caustics/causticsP.hlsl
@@ -28,23 +28,33 @@ uniform float4    rtParams0;
 uniform float4    waterFogPlane;
 uniform float     accumTime;
 
+float distanceToPlane(float4 plane, float3 pos)
+{
+   return (plane.x * pos.x + plane.y * pos.y + plane.z * pos.z) + plane.w;
+}
+
 float4 main( PFXVertToPix IN, 
              uniform sampler2D prepassTex :register(S0),
              uniform sampler2D causticsTex0 :register(S1),
-             uniform sampler2D causticsTex1 :register(S2),
-             uniform float2 targetSize : register(C0) ) : COLOR
+             uniform sampler2D causticsTex1 :register(S2) ) : COLOR
 {   
    //Sample the pre-pass
-   float2 prepassCoord = ( IN.uv0.xy * rtParams0.zw ) + rtParams0.xy;  
-   float4 prePass = prepassUncondition( prepassTex, prepassCoord );
+   float4 prePass = prepassUncondition( prepassTex, IN.uv0 );
    
    //Get depth
    float depth = prePass.w;   
-   clip( 0.9999 - depth );
+   if(depth > 0.9999)
+      return float4(0,0,0,0);
    
    //Get world position
    float3 pos = eyePosWorld + IN.wsEyeRay * depth;
    
+   // Check the water depth
+   float waterDepth = -distanceToPlane(waterFogPlane, pos);
+   if(waterDepth < 0)
+      return float4(0,0,0,0);
+   waterDepth = saturate(waterDepth);
+   
    //Use world position X and Y to calculate caustics UV 
    float2 causticsUV0 = (abs(pos.xy * 0.25) % float2(1, 1));
    float2 causticsUV1 = (abs(pos.xy * 0.2) % float2(1, 1));
@@ -59,7 +69,7 @@ float4 main( PFXVertToPix IN,
    caustics *= tex2D(causticsTex1, causticsUV1);
    
    //Use normal Z to modulate caustics  
-   float waterDepth = 1 - saturate(pos.z + waterFogPlane.w + 1);
+   //float waterDepth = 1 - saturate(pos.z + waterFogPlane.w + 1);
    caustics *= saturate(prePass.z) * pow(1-depth, 64) * waterDepth; 
       
    return caustics;   
diff --git a/Templates/Full/game/shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl b/Templates/Full/game/shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl
new file mode 100644
index 000000000..726fb0afc
--- /dev/null
+++ b/Templates/Full/game/shaders/common/postFx/oculusvr/barrelDistortionChromaP.hlsl
@@ -0,0 +1,95 @@
+//-----------------------------------------------------------------------------
+// Copyright (c) 2012 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+#include "shadergen:/autogenConditioners.h"  
+#include "../postFx.hlsl"  
+#include "../../torque.hlsl"
+
+uniform sampler2D backBuffer : register(S0);
+
+uniform float3 LensCenter;    // x=Left X, y=Right X, z=Y
+uniform float2 ScreenCenter;
+uniform float2 Scale;
+uniform float2 ScaleIn;
+uniform float4 HmdWarpParam;
+uniform float4 HmdChromaAbParam; // Chromatic aberration correction
+
+float4 main( PFXVertToPix IN ) : COLOR0  
+{
+   float2 texCoord;
+   float xOffset;
+   float2 lensCenter;
+   lensCenter.y = LensCenter.z;
+   if(IN.uv0.x < 0.5)
+   {
+      texCoord.x = IN.uv0.x;
+      texCoord.y = IN.uv0.y;
+      xOffset = 0.0;
+      lensCenter.x = LensCenter.x;
+   }
+   else
+   {
+      texCoord.x = IN.uv0.x - 0.5;
+      texCoord.y = IN.uv0.y;
+      xOffset = 0.5;
+      lensCenter.x = LensCenter.y;
+   }
+   
+   // Scales input texture coordinates for distortion.
+   // ScaleIn maps texture coordinates to Scales to ([-1, 1]), although top/bottom will be
+   // larger due to aspect ratio.
+   float2 theta = (texCoord - lensCenter) * ScaleIn; // Scales to [-1, 1]
+   float rSq = theta.x * theta.x + theta.y * theta.y;
+   float2 theta1 = theta * (HmdWarpParam.x + HmdWarpParam.y * rSq + HmdWarpParam.z * rSq * rSq + HmdWarpParam.w * rSq * rSq * rSq);
+
+   // Detect whether blue texture coordinates are out of range
+   // since these will scaled out the furthest.
+   float2 thetaBlue = theta1 * (HmdChromaAbParam.z + HmdChromaAbParam.w * rSq);
+   float2 tcBlue = lensCenter + Scale * thetaBlue;
+   
+   float4 color;
+   if (any(clamp(tcBlue, ScreenCenter-float2(0.25,0.5), ScreenCenter+float2(0.25, 0.5)) - tcBlue))
+   {
+      color = float4(0,0,0,0);
+   }
+   else
+   {
+      // Now do blue texture lookup.
+      tcBlue.x += xOffset;
+      float blue = tex2D(backBuffer, tcBlue).b;
+
+      // Do green lookup (no scaling).
+      float2 tcGreen = lensCenter + Scale * theta1;
+      tcGreen.x += xOffset;
+      float green = tex2D(backBuffer, tcGreen).g;
+
+      // Do red scale and lookup.
+      float2 thetaRed = theta1 * (HmdChromaAbParam.x + HmdChromaAbParam.y * rSq);
+      float2 tcRed = lensCenter + Scale * thetaRed;
+      tcRed.x += xOffset;
+      float red = tex2D(backBuffer, tcRed).r;
+
+      color = float4(red, green, blue, 1);
+   }
+
+   return color;    
+}
diff --git a/Templates/Full/game/shaders/common/postFx/turbulenceP.hlsl b/Templates/Full/game/shaders/common/postFx/turbulenceP.hlsl
index a8f79cfee..0fb38ea07 100644
--- a/Templates/Full/game/shaders/common/postFx/turbulenceP.hlsl
+++ b/Templates/Full/game/shaders/common/postFx/turbulenceP.hlsl
@@ -23,23 +23,20 @@
 #include "./postFx.hlsl"
 
 uniform float  accumTime;
+uniform float2 projectionOffset;
+uniform float4 targetViewport;
 
 float4 main( PFXVertToPix IN, uniform sampler2D inputTex : register(S0) ) : COLOR
 {
-	float reduction = 128;	
-	float power = 1.0;
-	float speed = 3.0;
-	float frequency=8;
+	float speed = 2.0;
+	float distortion = 6.0;
 	
-	float backbuffer_edge_coef=0.98;
-	float2 screen_center = float2(0.5, 0.5);	
-	float2 cPos = (IN.uv0 - screen_center);
+	float y = IN.uv0.y + (cos((IN.uv0.y+projectionOffset.y) * distortion + accumTime * speed) * 0.01);
+   float x = IN.uv0.x + (sin((IN.uv0.x+projectionOffset.x) * distortion + accumTime * speed) * 0.01);
+
+   // Clamp the calculated uv values to be within the target's viewport
+	y = clamp(y, targetViewport.y, targetViewport.w);
+	x = clamp(x, targetViewport.x, targetViewport.z);
 	
-	float len = 1.0 - length(cPos);		
-	float2 uv = clamp((cPos / len * cos(len * frequency - (accumTime * speed)) * (power / reduction)), 0, 1);
-	return tex2D(inputTex, IN.uv0 * backbuffer_edge_coef + uv);
-
-//    float4 color = tex2D(inputTex, IN.uv0 * backbuffer_edge_coef+(sin*right));           
-//	return color;
-
-}
\ No newline at end of file
+    return tex2D (inputTex, float2(x, y));
+}
diff --git a/Templates/Full/game/shaders/common/postFx/underwaterFogP.hlsl b/Templates/Full/game/shaders/common/postFx/underwaterFogP.hlsl
index a0aa90ecd..c01467e29 100644
--- a/Templates/Full/game/shaders/common/postFx/underwaterFogP.hlsl
+++ b/Templates/Full/game/shaders/common/postFx/underwaterFogP.hlsl
@@ -132,7 +132,7 @@ float4 main( PFXVertToPix IN ) : COLOR
    inColor.rgb *= 1.0 - saturate( abs( planeDist ) / WET_DEPTH ) * WET_DARKENING;
    //return float4( inColor, 1 );
    
-   float3 outColor = lerp( inColor, fogColor, fogAmt );
+   float3 outColor = lerp( inColor, fogColor.rgb, fogAmt );
    
    return float4( hdrEncode( outColor ), 1 );        
 }
\ No newline at end of file
diff --git a/Templates/Full/game/tools/classIcons/NavMesh.png b/Templates/Full/game/tools/classIcons/NavMesh.png
new file mode 100644
index 000000000..056d3c3ac
Binary files /dev/null and b/Templates/Full/game/tools/classIcons/NavMesh.png differ
diff --git a/Templates/Full/game/tools/classIcons/NavPath.png b/Templates/Full/game/tools/classIcons/NavPath.png
new file mode 100644
index 000000000..35b8372ae
Binary files /dev/null and b/Templates/Full/game/tools/classIcons/NavPath.png differ
diff --git a/Templates/Full/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui b/Templates/Full/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui
index c6153eadf..90298419b 100644
--- a/Templates/Full/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui
+++ b/Templates/Full/game/tools/materialEditor/gui/guiMaterialPropertiesWindow.ed.gui
@@ -1522,7 +1522,7 @@
                         profile = "ToolsGuiTransparentProfile";
                         isContainer = "1";
                         position = "0 0";
-                        Extent = "185 22";
+                        Extent = "185 44";
                         HorizSizing = "width";
                         
                         new GuiCheckBoxCtrl() {
@@ -1568,6 +1568,14 @@
                            useMouseEvents = "0";
                         };
                         
+                        new GuiTextCtrl() {
+                           HorizSizing = "right";
+                           VertSizing = "bottom";
+                           position = "9 26";
+                           Extent = "72 16";
+                           text = "Spec strength";
+                        };
+
                         new GuiControl() {
                            class = "AggregateControl";
                            position = "91 4";
@@ -1589,7 +1597,7 @@
                               Command = "MaterialEditorGui.updateActiveMaterial(\"specularPower[\" @ MaterialEditorGui.currentLayer @ \"]\", mCeil($ThisControl.getValue()), true, true);";
                               AltCommand = "$ThisControl.getParent().updateFromChild($ThisControl); MaterialEditorGui.updateActiveMaterial(\"specularPower[\" @ MaterialEditorGui.currentLayer @ \"]\", mCeil($ThisControl.getValue()), true, false);";
                               tooltipprofile = "ToolsGuiDefaultProfile";
-                              ToolTip = "Sets the strength of the Pixel Specular value.";
+                              ToolTip = "Sets the hardness of the Pixel Specular value.";
                               hovertime = "1000";
                               range = "1 128";
                               ticks = "0";
@@ -1618,6 +1626,57 @@
                               maxLength = "3";
                            };
                         };
+
+                        new GuiControl() {
+                           class = "AggregateControl";
+                           position = "91 26";
+                           Extent = "96 20";
+
+                           new GuiSliderCtrl() {
+                              canSaveDynamicFields = "0";
+                              internalName = "specularStrengthSlider";
+                              Enabled = "1";
+                              isContainer = "0";
+                              Profile = "ToolsGuiSliderProfile";
+                              HorizSizing = "right";
+                              VertSizing = "bottom";
+                              position = "0 1";
+                              Extent = "61 14";
+                              MinExtent = "8 2";
+                              canSave = "1";
+                              Visible = "1";
+                              Command = "MaterialEditorGui.updateActiveMaterial(\"specularStrength[\" @ MaterialEditorGui.currentLayer @ \"]\", $ThisControl.getValue(), true, true);";
+                              AltCommand = "$ThisControl.getParent().updateFromChild($ThisControl); MaterialEditorGui.updateActiveMaterial(\"specularStrength[\" @ MaterialEditorGui.currentLayer @ \"]\", $ThisControl.getValue(), true, false);";
+                              tooltipprofile = "ToolsGuiDefaultProfile";
+                              ToolTip = "Sets the strength of the Pixel Specular value.";
+                              hovertime = "1000";
+                              range = "0 5";
+                              ticks = "0";
+                              value = "1";
+                           };
+                           new GuiTextEditCtrl() {
+                              canSaveDynamicFields = "0";
+                              internalName = "specularStrengthTextEdit";
+                              Enabled = "1";
+                              isContainer = "0";
+                              Profile = "ToolsGuiTextEditProfile";
+                              HorizSizing = "right";
+                              VertSizing = "bottom";
+                              position = "64 0";
+                              Extent = "29 18";
+                              MinExtent = "8 2";
+                              canSave = "1";
+                              Visible = "1";
+                              Command = "$ThisControl.getParent().updateFromChild($ThisControl); MaterialEditorGui.updateActiveMaterial(\"specularStrength[\" @ MaterialEditorGui.currentLayer @ \"]\", $ThisControl.getValue());";
+                              hovertime = "1000";
+                              AnchorTop = "1";
+                              AnchorBottom = "0";
+                              AnchorLeft = "1";
+                              AnchorRight = "0";
+                              text = "1";
+                              maxLength = "3";
+                           };
+                        };
                      };
                      new GuiContainer(){ // glow emissive
                         profile = "ToolsGuiTransparentProfile";
diff --git a/Templates/Full/game/tools/materialEditor/scripts/materialEditor.ed.cs b/Templates/Full/game/tools/materialEditor/scripts/materialEditor.ed.cs
index 169fae5e4..ec1020fa4 100644
--- a/Templates/Full/game/tools/materialEditor/scripts/materialEditor.ed.cs
+++ b/Templates/Full/game/tools/materialEditor/scripts/materialEditor.ed.cs
@@ -894,6 +894,8 @@ function MaterialEditorGui::guiSync( %this, %material )
    
    MaterialEditorPropertiesWindow-->specularPowerTextEdit.setText((%material).specularPower[%layer]);
    MaterialEditorPropertiesWindow-->specularPowerSlider.setValue((%material).specularPower[%layer]);
+   MaterialEditorPropertiesWindow-->specularStrengthTextEdit.setText((%material).specularStrength[%layer]);
+   MaterialEditorPropertiesWindow-->specularStrengthSlider.setValue((%material).specularStrength[%layer]);
    MaterialEditorPropertiesWindow-->pixelSpecularCheckbox.setValue((%material).pixelSpecular[%layer]);
    MaterialEditorPropertiesWindow-->glowCheckbox.setValue((%material).glow[%layer]);
    MaterialEditorPropertiesWindow-->emissiveCheckbox.setValue((%material).emissive[%layer]);
diff --git a/Templates/Full/game/tools/particleEditor/ParticleEditor.ed.gui b/Templates/Full/game/tools/particleEditor/ParticleEditor.ed.gui
index cbc17ca61..865334dbd 100644
--- a/Templates/Full/game/tools/particleEditor/ParticleEditor.ed.gui
+++ b/Templates/Full/game/tools/particleEditor/ParticleEditor.ed.gui
@@ -1069,7 +1069,53 @@ $PE_guielement_ext_colorpicker = "18 18";
                            position = $PE_guielement_pos_value;
                            Extent = $PE_guielement_ext_value;
                            altCommand = "$ThisControl.getParent().updateFromChild($ThisControl); PE_EmitterEditor.updateEmitter( \"ejectionOffset\", $ThisControl.getText());";
-                        };             
+                        };
+					 };
+                     new GuiControl(){ // Emitter Offset Variance
+                        class = "AggregateControl";
+                        isContainer = "1";
+                        HorizSizing = "width";
+                        VertSizing = "bottom";
+                        Position = $PE_guielement_pos_single_container ;
+                        Extent = $PE_guielement_ext_single_container ;
+                        
+                        new GuiTextCtrl() {
+                           Profile = "ToolsGuiTextProfile";
+                           HorizSizing = "width";
+                           VertSizing = "bottom";
+                           position = $PE_guielement_pos_name;
+                           Extent = $PE_guielement_ext_name;
+                           text = "OffsetVariance";
+                        };
+                        new GuiSliderCtrl(PEE_ejectionOffsetVariance) {
+                           internalName = "PEE_ejectionOffsetVariance_slider";
+                           canSaveDynamicFields = "0";
+                           Enabled = "1";
+                           isContainer = "0";
+                           Profile = "ToolsGuiSliderProfile";
+                           HorizSizing = "left";
+                           VertSizing = "bottom";
+                           position = $PE_guielement_pos_slider;
+                           Extent = $PE_guielement_ext_slider;
+                           MinExtent = "8 2";
+                           canSave = "1";
+                           Visible = "1";
+                           Command = "PE_EmitterEditor.updateEmitter( \"ejectionOffsetVariance\", $ThisControl.getValue(), true, true );";
+                           altCommand = "$ThisControl.getParent().updateFromChild($ThisControl); PE_EmitterEditor.updateEmitter( \"ejectionOffsetVariance\", $ThisControl.getValue(), true, false );";
+                           hovertime = "1000";
+                           range = "0 25";
+                           ticks = "0";
+                           value = "0";
+                        };
+                        new GuiTextEditCtrl() {
+                           internalName = "PEE_ejectionOffsetVariance_textEdit";
+                           Profile = "ToolsGuiTextEditProfile";
+                           HorizSizing = "left";
+                           VertSizing = "bottom";
+                           position = $PE_guielement_pos_value;
+                           Extent = $PE_guielement_ext_value;
+                           altCommand = "$ThisControl.getParent().updateFromChild($ThisControl); PE_EmitterEditor.updateEmitter( \"ejectionOffsetVariance\", $ThisControl.getText());";
+                        };               
                      };
                   }; // end stack
                }; // end "spread" rollout
diff --git a/Templates/Full/game/tools/worldEditor/gui/EditorGui.ed.gui b/Templates/Full/game/tools/worldEditor/gui/EditorGui.ed.gui
index 484d65fe9..445bacf63 100644
--- a/Templates/Full/game/tools/worldEditor/gui/EditorGui.ed.gui
+++ b/Templates/Full/game/tools/worldEditor/gui/EditorGui.ed.gui
@@ -531,7 +531,7 @@
       selectionHidden = "1";
       renderVertexSelection = "1";
       processUsesBrush = "0";
-      maxBrushSize = "40 40";
+      maxBrushSize = "256 256";
       adjustHeightVal = "10";
       setHeightVal = "100";
       scaleVal = "1";
diff --git a/Templates/Full/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui b/Templates/Full/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui
index 34e880497..cc9d6983b 100644
--- a/Templates/Full/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui
+++ b/Templates/Full/game/tools/worldEditor/gui/TerrainEditToolbar.ed.gui
@@ -517,7 +517,7 @@ new GuiMouseEventCtrl(TerrainBrushSizeSliderCtrlContainer,EditorGuiGroup) {
       canSave = "1";
       Visible = "1";
       AltCommand = "TerrainBrushSizeTextEditContainer-->textEdit.setValue(mCeil($ThisControl.getValue())); ETerrainEditor.setBrushSize( $ThisControl.value );";
-      range = "1 40";
+      range = "1 256";
       ticks = "0";
       value = "0";
    };
diff --git a/Templates/Full/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui b/Templates/Full/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui
index 3f98f8b60..8cdba481d 100644
--- a/Templates/Full/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui
+++ b/Templates/Full/game/tools/worldEditor/gui/TerrainPainterToolbar.ed.gui
@@ -494,7 +494,7 @@ new GuiMouseEventCtrl(PaintBrushSizeSliderCtrlContainer,EditorGuiGroup) {
       canSave = "1";
       Visible = "1";
       AltCommand = "PaintBrushSizeTextEditContainer-->textEdit.setValue(mFloatLength( ($ThisControl.getValue()), 2 )); ETerrainEditor.setBrushSize( $ThisControl.value );";
-      range = "1 40";
+      range = "1 256";
       ticks = "0";
       value = "0";
    };
diff --git a/Templates/Full/game/tools/worldEditor/scripts/editors/creator.ed.cs b/Templates/Full/game/tools/worldEditor/scripts/editors/creator.ed.cs
index 18493ef4a..2e0a3165c 100644
--- a/Templates/Full/game/tools/worldEditor/scripts/editors/creator.ed.cs
+++ b/Templates/Full/game/tools/worldEditor/scripts/editors/creator.ed.cs
@@ -82,6 +82,8 @@ function EWCreatorWindow::init( %this )
       %this.registerMissionObject( "SpawnSphere",  "Observer Spawn Sphere", "ObserverDropPoint" );
       %this.registerMissionObject( "SFXSpace",      "Sound Space" );
       %this.registerMissionObject( "OcclusionVolume", "Occlusion Volume" );
+      %this.registerMissionObject("NavMesh", "Navigation mesh");
+      %this.registerMissionObject("NavPath", "Path");
       
    %this.endGroup();
    
diff --git a/Tools/projectGenerator/libs/librecast.conf b/Tools/projectGenerator/libs/librecast.conf
new file mode 100644
index 000000000..bb0aa66f7
--- /dev/null
+++ b/Tools/projectGenerator/libs/librecast.conf
@@ -0,0 +1,42 @@
+<?php
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+beginLibConfig( 'librecast', '{F2C0209B-1B90-4F73-816A-A0920FF8B107}' );
+
+   // Source
+   addSrcDir( Generator::getLibSrcDir() . 'recast/DebugUtils/Source', true );
+   addSrcDir( Generator::getLibSrcDir() . 'recast/Recast/Source', true );
+   addSrcDir( Generator::getLibSrcDir() . 'recast/Detour/Source', true );
+   addSrcDir( Generator::getLibSrcDir() . 'recast/DetourCrowd/Source', true );
+   addSrcDir( Generator::getLibSrcDir() . 'recast/DetourTileCache/Source', true );
+
+   // Additional includes
+   addLibIncludePath( 'recast/DebugUtils/Include' );
+   addLibIncludePath( 'recast/Recast/Include' );
+   addLibIncludePath( 'recast/Detour/Include' );
+   addLibIncludePath( 'recast/DetourTileCache/Include' );
+   addLibIncludePath( 'recast/DetourCrowd/Include' );
+
+endLibConfig();
+
+?>
diff --git a/Tools/projectGenerator/modules/navigation.inc b/Tools/projectGenerator/modules/navigation.inc
new file mode 100644
index 000000000..8118253f6
--- /dev/null
+++ b/Tools/projectGenerator/modules/navigation.inc
@@ -0,0 +1,44 @@
+<?php
+//-----------------------------------------------------------------------------
+// Copyright (c) 2013 GarageGames, LLC
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//-----------------------------------------------------------------------------
+
+beginModule( 'navigation' );
+
+   addProjectDefine( 'TORQUE_NAVIGATION_ENABLED' );
+   addSrcDir(getEngineSrcDir() . 'navigation', true);
+
+   includeLib( 'librecast' );
+   addLibIncludePath( 'recast/DebugUtils/Include' );
+   addLibIncludePath( 'recast/Recast/Include' );
+   addLibIncludePath( 'recast/Detour/Include' );
+   addLibIncludePath( 'recast/DetourTileCache/Include' );
+   addLibIncludePath( 'recast/DetourCrowd/Include' );
+
+   if (inProjectConfig())
+   {
+      addProjectDependency( 'librecast' );
+      addSolutionProjectRef( 'librecast' );
+   }
+
+endModule();
+
+?>