Torque3D/Engine/source/math/mMath_ASM.asm

240 lines
5 KiB
NASM
Raw Normal View History

2012-09-19 15:15:01 +00:00
;-----------------------------------------------------------------------------
; Copyright (c) 2012 GarageGames, LLC
;
; Permission is hereby granted, free of charge, to any person obtaining a copy
; of this software and associated documentation files (the "Software"), to
; deal in the Software without restriction, including without limitation the
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
; sell copies of the Software, and to permit persons to whom the Software is
; furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included in
; all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;-----------------------------------------------------------------------------
;
; NASM version of optimized funcs in mMath_C
;
; The following funcs are included:
; m_ceil_ASM, m_ceilD_ASM, m_floor_ASM, m_floorD_ASM
; m_fmod_ASM, m_fmodD_ASM, m_mulDivS32_ASM, m_mulDivU32_ASM
; m_sincos_ASM, m_sincosD_ASM
; The other funcs from mMath_C were determined to compile into fast
; code using MSVC --Paul Bowman
segment .data
temp_int64 dq 0.0
const_0pt5_D dq 0.4999999999995
temp_int32 dd 0
const_0pt5 dd 0.49999995
const_neg1 dd -1.0
segment .text
%macro export_fn 1
%ifidn __OUTPUT_FORMAT__, elf
; No underscore needed for ELF object files
global %1
%1:
%else
global _%1
_%1:
%endif
%endmacro
%define rnd_adjD qword [const_0pt5_D]
%define rnd_adj dword [const_0pt5]
%define val dword [esp+4]
%define val64 qword [esp+4]
;
; static F32 m_ceil_ASM(F32 val)
;
export_fn m_ceil_ASM
fld val
fadd rnd_adj
fistp qword [temp_int64]
fild qword [temp_int64]
ret
;
; static F64 m_ceilD_ASM(F64 val64)
;
export_fn m_ceilD_ASM
fld val64
fadd rnd_adjD
fistp qword [temp_int64]
fild qword [temp_int64]
ret
;
; static F32 m_floor_ASM(F32 val)
;
export_fn m_floor_ASM
fld val
fsub rnd_adj
fistp qword [temp_int64]
fild qword [temp_int64]
ret
;
; static F32 m_floorD_ASM( F64 val64 )
;
export_fn m_floorD_ASM
fld val64
fsub rnd_adjD
fistp qword [temp_int64]
fild qword [temp_int64]
ret
%define arg_a dword [esp+4]
%define arg_b dword [esp+8]
%define arg_c dword [esp+12]
;
; static S32 m_mulDivS32_ASM( S32 a, S32 b, S32 c )
;
; // Note: this returns different (but correct) values than the C
; // version. C code must be overflowing...returns -727
; // if a b and c are 1 million, for instance. This version returns
; // 1 million.
; return (S32) ((S64)a*(S64)b) / (S64)c;
;
export_fn m_mulDivS32_ASM
mov eax, arg_a
imul arg_b
idiv arg_c
ret
;
; static U32 m_mulDivU32_ASM( U32 a, U32 b, U32 c )
;
; // Note: again, C version overflows
;
export_fn m_mulDivU32_ASM
mov eax, arg_a
mul arg_b
div arg_c
ret
; val is already defined above to be esp+4
%define modulo dword [esp+8]
;
; static F32 m_fmod_ASM(F32 val, F32 modulo)
;
export_fn m_fmod_ASM
mov eax, val
fld modulo
fabs
fld val
fabs
fdiv st0, st1
fld st0
fsub rnd_adj
fistp qword [temp_int64]
fild qword [temp_int64]
fsubp st1, st0
fmulp st1, st0
; // sign bit can be read as integer high bit,
; // as long as # isn't 0x80000000
cmp eax, 0x80000000
jbe notneg
fmul dword [const_neg1]
notneg:
ret
%define val64hi dword [esp+8]
%define val64 qword [esp+4]
%define modulo64 qword [esp+12]
;
; static F32 m_fmodD_ASM(F64 val, F64 modulo)
;
export_fn m_fmodD_ASM
mov eax, val64hi
fld modulo64
fabs
fld val64
fabs
fdiv st0, st1
fld st0
fsub rnd_adjD
fistp qword [temp_int64]
fild qword [temp_int64]
fsubp st1, st0
fmulp st1, st0
; // sign bit can be read as integer high bit,
; // as long as # isn't 0x80000000
cmp eax, 0x80000000
jbe notnegD
fmul dword [const_neg1]
notnegD:
ret
%define angle dword [esp+4]
%define res_sin dword [esp+8]
%define res_cos dword [esp+12]
;
;static void m_sincos_ASM( F32 angle, F32 *s, F32 *c )
;
export_fn m_sincos_ASM
mov eax, res_cos
fld angle
fsincos
fstp dword [eax]
mov eax, res_sin
fstp dword [eax]
ret
%define angle64 qword [esp+4]
%define res_sin64 dword [esp+12]
%define res_cos64 dword [esp+16]
;
;static void m_sincosD_ASM( F64 angle, F64 *s, F64 *c )
;
export_fn m_sincosD_ASM
mov eax, res_cos64
fld angle64
fsincos
fstp qword [eax]
mov eax, res_sin64
fstp qword [eax]
ret