fix linux compile

added flags for different simd availability
linux complained (as usual)
This commit is contained in:
marauder2k7 2026-02-25 18:05:39 +00:00
parent a59ba751eb
commit 8908bbe462
3 changed files with 70 additions and 3 deletions

View file

@ -58,6 +58,51 @@ else()
endif()
endif()
# ---------------------------
# Linux / GCC / Clang SIMD flags
# ---------------------------
if(NOT MSVC)
check_simd_flag("SSE2" "-msse2")
check_simd_flag("SSE3" "-msse3")
check_simd_flag("SSSE3" "-mssse3")
check_simd_flag("SSE4_1" "-msse4.1")
check_simd_flag("SSE4_2" "-msse4.2")
check_simd_flag("AVX" "-mavx")
check_simd_flag("AVX2" "-mavx2")
check_simd_flag("AVX512F" "-mavx512f")
endif()
# ---------------------------
# Windows / MSVC SIMD detection
# ---------------------------
if(MSVC)
# Baseline SSE2 (guaranteed)
add_compile_options(/arch:SSE2)
add_definitions(-DTORQUE_HAVE_SSE2)
# SSE4.1 detection: check for <smmintrin.h> availability
check_include_file("smmintrin.h" HAVE_SSE4_1_HEADER)
if(HAVE_SSE4_1_HEADER)
message(STATUS "MSVC: SSE4.1 header available")
add_definitions(-DTORQUE_HAVE_SSE4_1)
else()
message(STATUS "MSVC: SSE4.1 header NOT available")
endif()
# AVX
check_include_file("immintrin.h" HAVE_AVX_HEADER)
if(HAVE_AVX_HEADER)
message(STATUS "MSVC: AVX header available")
add_definitions(-DTORQUE_HAVE_AVX)
add_compile_options(/arch:AVX)
if(MSVC_VERSION GREATER_EQUAL 1800) # assume avx2 support if post VS2013
add_definitions(-DTORQUE_HAVE_AVX2)
add_compile_options(/arch:AVX2)
endif()
endif()
endif()
if(NOT TORQUE_SCRIPT_EXTENSION)
set(TORQUE_SCRIPT_EXTENSION "tscript" CACHE STRING "The default script extension to use for TorqueScript files")
endif()

View file

@ -196,12 +196,14 @@ float math_backend::float4::dot(const float* a, const float* b)
// ---- SSE4.1 : dedicated dot instruction ----
case ISA::AVX2:
case ISA::AVX:
case ISA::SSE41: {
case ISA::SSE41:
#ifdef TORQUE_HAVE_SSE4_1 // Linux macro required in case sse4 does not exist.
{
__m128 va = _mm_loadu_ps(a);
__m128 vb = _mm_loadu_ps(b);
return _mm_cvtss_f32(_mm_dp_ps(va, vb, 0xFF));
}
#endif
// ---- SSE2 fallback (no horizontal ops available) ----
case ISA::SSE2: {
__m128 va = _mm_loadu_ps(a);

View file

@ -133,4 +133,24 @@ macro(addFramework framework)
find_library(_${framework}_FRAMEWORK_PATH ${framework} PATHS /System/Library/Frameworks /Library/Frameworks)
set(TORQUE_LINK_FRAMEWORKS ${TORQUE_LINK_FRAMEWORKS} "${_${framework}_FRAMEWORK_PATH}")
endif()
endmacro()
endmacro()
include(CheckCXXCompilerFlag)
include(CheckIncludeFile)
macro(check_simd_flag FLAG_NAME GCC_FLAG)
string(TOUPPER ${FLAG_NAME} FLAG_UPPER)
if(MSVC)
message(STATUS "MSVC: skipping GCC-style flag ${GCC_FLAG}")
else()
check_cxx_compiler_flag(${GCC_FLAG} COMPILER_SUPPORTS_${FLAG_UPPER})
if(COMPILER_SUPPORTS_${FLAG_UPPER})
message(STATUS "Compiler supports ${FLAG_NAME}: ${GCC_FLAG}")
add_compile_options(${GCC_FLAG})
add_definitions(-DTORQUE_HAVE_${FLAG_UPPER})
else()
message(STATUS "Compiler does NOT support ${FLAG_NAME}: ${GCC_FLAG}")
endif()
endif()
endmacro()