Torque3D/Engine/source/math/mMath_ASM.asm

;-----------------------------------------------------------------------------
; Copyright (c) 2012 GarageGames, LLC
;
; Permission is hereby granted, free of charge, to any person obtaining a copy
; of this software and associated documentation files (the "Software"), to
; deal in the Software without restriction, including without limitation the
; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
; sell copies of the Software, and to permit persons to whom the Software is
; furnished to do so, subject to the following conditions:
;
; The above copyright notice and this permission notice shall be included in
; all copies or substantial portions of the Software.
;
; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
; IN THE SOFTWARE.
;-----------------------------------------------------------------------------

;
; NASM version of optimized funcs in mMath_C
;

; The following funcs are included:
;  m_ceil_ASM, m_ceilD_ASM, m_floor_ASM, m_floorD_ASM
;  m_fmod_ASM, m_fmodD_ASM, m_mulDivS32_ASM, m_mulDivU32_ASM
;  m_sincos_ASM, m_sincosD_ASM

; The other funcs from mMath_C were determined to compile into fast
;  code using MSVC --Paul Bowman


segment .data


temp_int64			dq		0.0
const_0pt5_D		dq		0.4999999999995
temp_int32			dd		0
const_0pt5			dd		0.49999995
const_neg1			dd		-1.0


segment .text

%macro export_fn 1
   %ifidn __OUTPUT_FORMAT__, elf
   ; No underscore needed for ELF object files
   global %1
   %1:
   %else
   global _%1
   _%1:
   %endif
%endmacro

%define rnd_adjD	qword [const_0pt5_D]
%define rnd_adj		dword [const_0pt5]


%define val		dword [esp+4]
%define val64	qword [esp+4]
;
; static F32 m_ceil_ASM(F32 val)
;
export_fn m_ceil_ASM
    fld		val
    fadd	rnd_adj
    fistp	qword [temp_int64]
    fild	qword [temp_int64]
	ret

;
; static F64 m_ceilD_ASM(F64 val64)
;
export_fn m_ceilD_ASM
    fld		val64
    fadd	rnd_adjD
    fistp	qword [temp_int64]
    fild	qword [temp_int64]
	ret

; 
; static F32 m_floor_ASM(F32 val)
; 
export_fn m_floor_ASM
    fld		val
    fsub	rnd_adj
    fistp	qword [temp_int64]
    fild	qword [temp_int64] 
	ret


;
; static F32 m_floorD_ASM( F64 val64 )
;
export_fn m_floorD_ASM
    fld		val64
    fsub	rnd_adjD
    fistp	qword [temp_int64]
    fild	qword [temp_int64] 
	ret


%define arg_a		dword [esp+4]
%define arg_b		dword [esp+8]
%define arg_c		dword [esp+12]

;
; static S32 m_mulDivS32_ASM( S32 a, S32 b, S32 c )
;
;    // Note: this returns different (but correct) values than the C
;    //  version.  C code must be overflowing...returns -727
;    //  if a b and c are 1 million, for instance.  This version returns
;    //  1 million.
; return (S32) ((S64)a*(S64)b) / (S64)c;
;
export_fn m_mulDivS32_ASM
    mov     eax, arg_a
    imul    arg_b
    idiv    arg_c
	ret

;
; static U32 m_mulDivU32_ASM( U32 a, U32 b, U32 c )
;
;    // Note: again, C version overflows
;
export_fn m_mulDivU32_ASM
    mov     eax, arg_a
    mul     arg_b
    div     arg_c
	ret


; val is already defined above to be esp+4
%define		modulo	dword [esp+8]


;
; static F32 m_fmod_ASM(F32 val, F32 modulo)
;
export_fn m_fmod_ASM
    mov     eax, val
    fld     modulo
    fabs
    fld     val
    fabs
    fdiv    st0, st1
    fld     st0
    fsub	rnd_adj
    fistp   qword [temp_int64]
    fild    qword [temp_int64]
    fsubp   st1, st0
    fmulp   st1, st0

;    // sign bit can be read as integer high bit, 
;    //  as long as # isn't 0x80000000
    cmp     eax, 0x80000000
    jbe     notneg

    fmul    dword [const_neg1]

notneg:
	ret


%define val64hi		dword [esp+8]
%define val64		qword [esp+4]
%define modulo64	qword [esp+12]

;
; static F32 m_fmodD_ASM(F64 val, F64 modulo)
;
export_fn m_fmodD_ASM
    mov     eax, val64hi
    fld     modulo64
    fabs
    fld     val64
    fabs
    fdiv    st0, st1
    fld     st0
    fsub	rnd_adjD
    fistp   qword [temp_int64]
    fild    qword [temp_int64]
    fsubp   st1, st0
    fmulp   st1, st0

;    // sign bit can be read as integer high bit, 
;    //  as long as # isn't 0x80000000
    cmp     eax, 0x80000000
    jbe     notnegD

    fmul    dword [const_neg1]

notnegD:
	ret

	 
%define angle		dword [esp+4]
%define res_sin		dword [esp+8]
%define res_cos		dword [esp+12]

;
;static void m_sincos_ASM( F32 angle, F32 *s, F32 *c )
;
export_fn m_sincos_ASM
    mov     eax, res_cos
    fld     angle
    fsincos
    fstp    dword [eax]
    mov     eax, res_sin
    fstp    dword [eax]
	ret


%define angle64		qword [esp+4]
%define res_sin64	dword [esp+12]
%define res_cos64	dword [esp+16]
;
;static void m_sincosD_ASM( F64 angle, F64 *s, F64 *c )
;
export_fn m_sincosD_ASM
    mov     eax, res_cos64
    fld     angle64
    fsincos
    fstp    qword [eax]
    mov     eax, res_sin64
    fstp    qword [eax]
	ret
Engine directory for ticket #1 2012-09-19 15:15:01 +00:00			`;-----------------------------------------------------------------------------`
			`; Copyright (c) 2012 GarageGames, LLC`
			`;`
			`; Permission is hereby granted, free of charge, to any person obtaining a copy`
			`; of this software and associated documentation files (the "Software"), to`
			`; deal in the Software without restriction, including without limitation the`
			`; rights to use, copy, modify, merge, publish, distribute, sublicense, and/or`
			`; sell copies of the Software, and to permit persons to whom the Software is`
			`; furnished to do so, subject to the following conditions:`
			`;`
			`; The above copyright notice and this permission notice shall be included in`
			`; all copies or substantial portions of the Software.`
			`;`
			`; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING`
			`; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS`
			`; IN THE SOFTWARE.`
			`;-----------------------------------------------------------------------------`

			`;`
			`; NASM version of optimized funcs in mMath_C`
			`;`

			`; The following funcs are included:`
			`; m_ceil_ASM, m_ceilD_ASM, m_floor_ASM, m_floorD_ASM`
			`; m_fmod_ASM, m_fmodD_ASM, m_mulDivS32_ASM, m_mulDivU32_ASM`
			`; m_sincos_ASM, m_sincosD_ASM`

			`; The other funcs from mMath_C were determined to compile into fast`
			`; code using MSVC --Paul Bowman`


			`segment .data`


			`temp_int64 dq 0.0`
			`const_0pt5_D dq 0.4999999999995`
			`temp_int32 dd 0`
			`const_0pt5 dd 0.49999995`
			`const_neg1 dd -1.0`


			`segment .text`

			`%macro export_fn 1`
			`%ifidn __OUTPUT_FORMAT__, elf`
			`; No underscore needed for ELF object files`
			`global %1`
			`%1:`
			`%else`
			`global _%1`
			`_%1:`
			`%endif`
			`%endmacro`

			`%define rnd_adjD qword [const_0pt5_D]`
			`%define rnd_adj dword [const_0pt5]`


			`%define val dword [esp+4]`
			`%define val64 qword [esp+4]`
			`;`
			`; static F32 m_ceil_ASM(F32 val)`
			`;`
			`export_fn m_ceil_ASM`
			`fld val`
			`fadd rnd_adj`
			`fistp qword [temp_int64]`
			`fild qword [temp_int64]`
			`ret`

			`;`
			`; static F64 m_ceilD_ASM(F64 val64)`
			`;`
			`export_fn m_ceilD_ASM`
			`fld val64`
			`fadd rnd_adjD`
			`fistp qword [temp_int64]`
			`fild qword [temp_int64]`
			`ret`

			`;`
			`; static F32 m_floor_ASM(F32 val)`
			`;`
			`export_fn m_floor_ASM`
			`fld val`
			`fsub rnd_adj`
			`fistp qword [temp_int64]`
			`fild qword [temp_int64]`
			`ret`


			`;`
			`; static F32 m_floorD_ASM( F64 val64 )`
			`;`
			`export_fn m_floorD_ASM`
			`fld val64`
			`fsub rnd_adjD`
			`fistp qword [temp_int64]`
			`fild qword [temp_int64]`
			`ret`



			`%define arg_a dword [esp+4]`
			`%define arg_b dword [esp+8]`
			`%define arg_c dword [esp+12]`

			`;`
			`; static S32 m_mulDivS32_ASM( S32 a, S32 b, S32 c )`
			`;`
			`; // Note: this returns different (but correct) values than the C`
			`; // version. C code must be overflowing...returns -727`
			`; // if a b and c are 1 million, for instance. This version returns`
			`; // 1 million.`
			`; return (S32) ((S64)a*(S64)b) / (S64)c;`
			`;`
			`export_fn m_mulDivS32_ASM`
			`mov eax, arg_a`
			`imul arg_b`
			`idiv arg_c`
			`ret`

			`;`
			`; static U32 m_mulDivU32_ASM( U32 a, U32 b, U32 c )`
			`;`
			`; // Note: again, C version overflows`
			`;`
			`export_fn m_mulDivU32_ASM`
			`mov eax, arg_a`
			`mul arg_b`
			`div arg_c`
			`ret`



			`; val is already defined above to be esp+4`
			`%define modulo dword [esp+8]`


			`;`
			`; static F32 m_fmod_ASM(F32 val, F32 modulo)`
			`;`
			`export_fn m_fmod_ASM`
			`mov eax, val`
			`fld modulo`
			`fabs`
			`fld val`
			`fabs`
			`fdiv st0, st1`
			`fld st0`
			`fsub rnd_adj`
			`fistp qword [temp_int64]`
			`fild qword [temp_int64]`
			`fsubp st1, st0`
			`fmulp st1, st0`

			`; // sign bit can be read as integer high bit,`
			`; // as long as # isn't 0x80000000`
			`cmp eax, 0x80000000`
			`jbe notneg`

			`fmul dword [const_neg1]`

			`notneg:`
			`ret`


			`%define val64hi dword [esp+8]`
			`%define val64 qword [esp+4]`
			`%define modulo64 qword [esp+12]`

			`;`
			`; static F32 m_fmodD_ASM(F64 val, F64 modulo)`
			`;`
			`export_fn m_fmodD_ASM`
			`mov eax, val64hi`
			`fld modulo64`
			`fabs`
			`fld val64`
			`fabs`
			`fdiv st0, st1`
			`fld st0`
			`fsub rnd_adjD`
			`fistp qword [temp_int64]`
			`fild qword [temp_int64]`
			`fsubp st1, st0`
			`fmulp st1, st0`

			`; // sign bit can be read as integer high bit,`
			`; // as long as # isn't 0x80000000`
			`cmp eax, 0x80000000`
			`jbe notnegD`

			`fmul dword [const_neg1]`

			`notnegD:`
			`ret`



			`%define angle dword [esp+4]`
			`%define res_sin dword [esp+8]`
			`%define res_cos dword [esp+12]`

			`;`
			`;static void m_sincos_ASM( F32 angle, F32 s, F32 c )`
			`;`
			`export_fn m_sincos_ASM`
			`mov eax, res_cos`
			`fld angle`
			`fsincos`
			`fstp dword [eax]`
			`mov eax, res_sin`
			`fstp dword [eax]`
			`ret`



			`%define angle64 qword [esp+4]`
			`%define res_sin64 dword [esp+12]`
			`%define res_cos64 dword [esp+16]`
			`;`
			`;static void m_sincosD_ASM( F64 angle, F64 s, F64 c )`
			`;`
			`export_fn m_sincosD_ASM`
			`mov eax, res_cos64`
			`fld angle64`
			`fsincos`
			`fstp qword [eax]`
			`mov eax, res_sin64`
			`fstp qword [eax]`
			`ret`