diff --git a/CMakeLists.txt b/CMakeLists.txt index 07a14c6fc..9f23458a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,6 +58,51 @@ else() endif() endif() +# --------------------------- +# Linux / GCC / Clang SIMD flags +# --------------------------- +if(NOT MSVC) + check_simd_flag("SSE2" "-msse2") + check_simd_flag("SSE3" "-msse3") + check_simd_flag("SSSE3" "-mssse3") + check_simd_flag("SSE4_1" "-msse4.1") + check_simd_flag("SSE4_2" "-msse4.2") + check_simd_flag("AVX" "-mavx") + check_simd_flag("AVX2" "-mavx2") + check_simd_flag("AVX512F" "-mavx512f") +endif() + +# --------------------------- +# Windows / MSVC SIMD detection +# --------------------------- +if(MSVC) + # Baseline SSE2 (guaranteed) + add_compile_options(/arch:SSE2) + add_definitions(-DTORQUE_HAVE_SSE2) + + # SSE4.1 detection: check for availability + check_include_file("smmintrin.h" HAVE_SSE4_1_HEADER) + if(HAVE_SSE4_1_HEADER) + message(STATUS "MSVC: SSE4.1 header available") + add_definitions(-DTORQUE_HAVE_SSE4_1) + else() + message(STATUS "MSVC: SSE4.1 header NOT available") + endif() + + # AVX + check_include_file("immintrin.h" HAVE_AVX_HEADER) + if(HAVE_AVX_HEADER) + message(STATUS "MSVC: AVX header available") + add_definitions(-DTORQUE_HAVE_AVX) + add_compile_options(/arch:AVX) + if(MSVC_VERSION GREATER_EQUAL 1800) # assume avx2 support if post VS2013 + add_definitions(-DTORQUE_HAVE_AVX2) + add_compile_options(/arch:AVX2) + endif() + endif() + +endif() + if(NOT TORQUE_SCRIPT_EXTENSION) set(TORQUE_SCRIPT_EXTENSION "tscript" CACHE STRING "The default script extension to use for TorqueScript files") endif() diff --git a/Engine/source/math/mMath_CPU.cpp b/Engine/source/math/mMath_CPU.cpp index 4bd4034e6..a3391c911 100644 --- a/Engine/source/math/mMath_CPU.cpp +++ b/Engine/source/math/mMath_CPU.cpp @@ -196,12 +196,14 @@ float math_backend::float4::dot(const float* a, const float* b) // ---- SSE4.1 : dedicated dot instruction ---- case ISA::AVX2: case ISA::AVX: - case ISA::SSE41: { + case ISA::SSE41: +#ifdef TORQUE_HAVE_SSE4_1 // Linux macro required in case sse4 does not exist. + { __m128 va = _mm_loadu_ps(a); __m128 vb = _mm_loadu_ps(b); return _mm_cvtss_f32(_mm_dp_ps(va, vb, 0xFF)); } - +#endif // ---- SSE2 fallback (no horizontal ops available) ---- case ISA::SSE2: { __m128 va = _mm_loadu_ps(a); diff --git a/Tools/CMake/torque_macros.cmake b/Tools/CMake/torque_macros.cmake index 48bb6896e..6ed586f3c 100644 --- a/Tools/CMake/torque_macros.cmake +++ b/Tools/CMake/torque_macros.cmake @@ -133,4 +133,24 @@ macro(addFramework framework) find_library(_${framework}_FRAMEWORK_PATH ${framework} PATHS /System/Library/Frameworks /Library/Frameworks) set(TORQUE_LINK_FRAMEWORKS ${TORQUE_LINK_FRAMEWORKS} "${_${framework}_FRAMEWORK_PATH}") endif() -endmacro() \ No newline at end of file +endmacro() + +include(CheckCXXCompilerFlag) +include(CheckIncludeFile) + +macro(check_simd_flag FLAG_NAME GCC_FLAG) + string(TOUPPER ${FLAG_NAME} FLAG_UPPER) + if(MSVC) + message(STATUS "MSVC: skipping GCC-style flag ${GCC_FLAG}") + else() + check_cxx_compiler_flag(${GCC_FLAG} COMPILER_SUPPORTS_${FLAG_UPPER}) + if(COMPILER_SUPPORTS_${FLAG_UPPER}) + message(STATUS "Compiler supports ${FLAG_NAME}: ${GCC_FLAG}") + add_compile_options(${GCC_FLAG}) + add_definitions(-DTORQUE_HAVE_${FLAG_UPPER}) + else() + message(STATUS "Compiler does NOT support ${FLAG_NAME}: ${GCC_FLAG}") + endif() + endif() +endmacro() +