mirror of
https://github.com/tribes2/engine.git
synced 2026-03-03 20:40:26 +00:00
t2 engine svn checkout
This commit is contained in:
commit
ff569bd2ae
988 changed files with 394180 additions and 0 deletions
387
platformLinux/mSolver_ASM.asm
Normal file
387
platformLinux/mSolver_ASM.asm
Normal file
|
|
@ -0,0 +1,387 @@
|
|||
;
|
||||
; ASM implementations of mSolver code
|
||||
;
|
||||
|
||||
; externs
|
||||
extern mSolveCubic_c__FffffPf
|
||||
|
||||
; data
|
||||
segment .data
|
||||
|
||||
; float constants
|
||||
f_quarter dd 0.25
|
||||
f_half dd 0.5
|
||||
f_n_half dd -0.5
|
||||
f_two dd 2.0
|
||||
f_n_three dd -3.0
|
||||
f_four dd 4.0
|
||||
f_n_four dd -4.0
|
||||
f_n_six dd -6.0
|
||||
f_eight dd 8.0
|
||||
|
||||
; integer constants
|
||||
i_n_one dd 1
|
||||
|
||||
; variables
|
||||
i dd 0
|
||||
nreal dd 0
|
||||
w dd 0
|
||||
b0 dd 0
|
||||
b1 dd 0
|
||||
b2 dd 0
|
||||
d0 dd 0
|
||||
d1 dd 0
|
||||
h dd 0
|
||||
t dd 0
|
||||
z dd 0
|
||||
px dd 0
|
||||
cubeA dd 0
|
||||
cubeB dd 0
|
||||
cubeC dd 0
|
||||
cubeD dd 0
|
||||
|
||||
p dd 0
|
||||
q dd 0
|
||||
dis dd 0
|
||||
phi dd 0
|
||||
|
||||
segment .text
|
||||
|
||||
; functions
|
||||
|
||||
;
|
||||
; U32 mSolveQuartic_ASM(F32 A, F32 B, F32 C, F32 D, F32 E, F32* x)
|
||||
;
|
||||
|
||||
%define A [ebp+8]
|
||||
%define B [ebp+12]
|
||||
%define C [ebp+16]
|
||||
%define D [ebp+20]
|
||||
%define E [ebp+24]
|
||||
%define x [ebp+28]
|
||||
|
||||
global mSolveQuartic_ASM
|
||||
|
||||
mSolveQuartic_ASM:
|
||||
|
||||
push ebp
|
||||
mov ebp, esp
|
||||
|
||||
; if (A==0.0)
|
||||
; return mSolveCubic(B, C, D, E, x);
|
||||
fld dword A
|
||||
fldz
|
||||
fcompp
|
||||
fnstsw ax
|
||||
and ah, 68
|
||||
xor ah, 64
|
||||
jne .L1
|
||||
|
||||
push dword x
|
||||
push dword E
|
||||
push dword D
|
||||
push dword C
|
||||
push dword B
|
||||
call mSolveCubic_c__FffffPf
|
||||
add esp, byte 20
|
||||
jmp .L2
|
||||
|
||||
.L1:
|
||||
; w = B/(4*A); /* offset */
|
||||
fld dword A
|
||||
fmul dword [f_four]
|
||||
fld dword B
|
||||
fxch
|
||||
fdivp st1, st0
|
||||
fstp dword [w]
|
||||
|
||||
; b2 = -6*square(w) + C/A; /* coeffs. of shifted polynomial */
|
||||
fld dword [w]
|
||||
fmul st0, st0
|
||||
fld dword [f_n_six]
|
||||
fmulp st1, st0
|
||||
fld dword C
|
||||
fld dword A
|
||||
fdivp st1, st0
|
||||
faddp st1, st0
|
||||
fstp dword [b2]
|
||||
|
||||
; b1 = (8*square(w) - 2*C/A)*w + D/A;
|
||||
fld dword [w]
|
||||
fmul st0, st0
|
||||
fmul dword [f_eight]
|
||||
fld dword C
|
||||
fld dword A
|
||||
fdivp st1, st0
|
||||
fmul dword [f_two]
|
||||
fsubp st1, st0
|
||||
fmul dword [w]
|
||||
fld dword D
|
||||
fld dword A
|
||||
fdivp st1, st0
|
||||
faddp st1, st0
|
||||
fstp dword [b1]
|
||||
|
||||
; b0 = ((-3*square(w) + C/A)*w - D/A)*w + E/A;
|
||||
fld dword [w]
|
||||
fmul st0, st0
|
||||
fmul dword [f_n_three]
|
||||
fld dword C
|
||||
fld dword A
|
||||
fdivp st1, st0
|
||||
faddp st1, st0
|
||||
fmul dword [w]
|
||||
fld dword D
|
||||
fld dword A
|
||||
fdivp st1, st0
|
||||
fsubp st1, st0
|
||||
fmul dword [w]
|
||||
fld dword E
|
||||
fld dword A
|
||||
fdivp st1, st0
|
||||
faddp st1, st0
|
||||
fstp dword [b0]
|
||||
|
||||
; // cubic resolvent
|
||||
; F32 cubeA = 1.0;
|
||||
; F32 cubeB = b2;
|
||||
; F32 cubeC = -4 * b0;
|
||||
; F32 cubeD = square(b1) - 4 * b0 * b2;
|
||||
fld1
|
||||
fstp dword [cubeA]
|
||||
fld dword [b2]
|
||||
fstp dword [cubeB]
|
||||
fld dword [b0]
|
||||
fmul dword [f_n_four]
|
||||
fstp dword [cubeC]
|
||||
fld dword [b1]
|
||||
fmul st0, st0
|
||||
fld dword [f_four]
|
||||
fmul dword [b0]
|
||||
fmul dword [b2]
|
||||
fsubp st1, st0
|
||||
fstp dword [cubeD]
|
||||
|
||||
; mSolveCubic(cubeA, cubeB, cubeC, cubeD, x);
|
||||
push dword x
|
||||
push dword [cubeD]
|
||||
push dword [cubeC]
|
||||
push dword [cubeB]
|
||||
push dword [cubeA]
|
||||
call mSolveCubic_c__FffffPf
|
||||
add esp, byte 20
|
||||
|
||||
mov eax, dword x
|
||||
; z = x[0];
|
||||
fld dword [eax+0]
|
||||
fstp dword [z]
|
||||
; nreal = 0;
|
||||
xor dword [nreal], 0
|
||||
; px = x;
|
||||
mov dword [px], eax
|
||||
; t = mSqrt(0.25 * square(z) - b0);
|
||||
fld dword [z]
|
||||
fmul st0, st0
|
||||
fmul dword [f_quarter]
|
||||
fsub dword [b0]
|
||||
fsqrt
|
||||
fstp dword [t]
|
||||
|
||||
; for (i=-1; i<=1; i+=2) {
|
||||
mov ecx, dword [i_n_one]
|
||||
.L3
|
||||
; d0 = -0.5*z + i*t; /* coeffs. of quadratic factor */
|
||||
fld dword [f_n_half]
|
||||
fmul dword [z]
|
||||
fld dword [i]
|
||||
fmul dword [t]
|
||||
faddp st1, st0
|
||||
fstp dword [d0]
|
||||
; d1 = (t!=0.0)? -i*0.5*b1/t : i*mSqrt(-z - b2);
|
||||
fild dword [i]
|
||||
fld dword [t]
|
||||
fldz
|
||||
fcompp
|
||||
fnstsw ax
|
||||
;; if t == 0.0 jump to .L4
|
||||
;; if C3 is set, jump to .L4
|
||||
;; if ah & C3 is not-zero, jump to .L4
|
||||
and ah, 64
|
||||
jnz .L4
|
||||
|
||||
fchs
|
||||
fmul dword [f_half]
|
||||
fmul dword [b1]
|
||||
fdiv dword [t]
|
||||
jmp .L5
|
||||
|
||||
.L4:
|
||||
fld dword [z]
|
||||
fchs
|
||||
fsub dword [b2]
|
||||
fsqrt
|
||||
fmulp st1, st0
|
||||
|
||||
.L5:
|
||||
fstp dword [d1]
|
||||
|
||||
; h = 0.25 * square(d1) - d0;
|
||||
fld dword [d1]
|
||||
fmul st0, st0
|
||||
fmul dword [f_quarter]
|
||||
fsub dword [d0]
|
||||
fst dword [h]
|
||||
|
||||
; if (h>=0.0) {
|
||||
; h = mSqrt(h);
|
||||
; nreal += 2;
|
||||
; *px = -0.5*d1 - h - w;
|
||||
; px++;
|
||||
; *px = -0.5*d1 + h - w;
|
||||
; px++;
|
||||
; }
|
||||
fldz
|
||||
fcomp st1
|
||||
; if h < 0.0, jump to .L6
|
||||
; if C0 is 0, jump to .L6
|
||||
; if ( ah & 0x1 ) == 0.0, jump to .L6
|
||||
fnstsw ax
|
||||
and ah, 0x1
|
||||
jz .L6
|
||||
|
||||
fsqrt
|
||||
fstp dword [h]
|
||||
add dword [nreal], 2
|
||||
|
||||
fld dword [d1]
|
||||
fmul dword [f_n_half]
|
||||
fsub dword [h]
|
||||
fsub dword [w]
|
||||
fstp dword [px]
|
||||
mov eax, dword [px]
|
||||
add eax, 4
|
||||
mov dword [px], eax
|
||||
fld dword [d1]
|
||||
fmul dword [f_n_half]
|
||||
fadd dword [h]
|
||||
fsub dword [w]
|
||||
fstp dword [px]
|
||||
mov eax, dword [px]
|
||||
add eax, 4
|
||||
mov dword [px], eax
|
||||
|
||||
; }
|
||||
.L6:
|
||||
add ecx, 2
|
||||
cmp ecx, 1
|
||||
jle near .L3
|
||||
|
||||
; // sort results in ascending order
|
||||
; if (nreal == 4)
|
||||
; {
|
||||
mov ecx, dword [nreal]
|
||||
cmp ecx, 4
|
||||
jnz near .L7
|
||||
|
||||
mov ecx, x
|
||||
|
||||
; if (x[2] < x[0])
|
||||
fld dword [ecx]
|
||||
fld dword [ecx+8]
|
||||
fcom st1
|
||||
fnstsw ax
|
||||
and ah, 0x1
|
||||
jz .L8
|
||||
|
||||
; swap(x[0], x[2]);
|
||||
fstp dword [ecx]
|
||||
fstp dword [ecx+8]
|
||||
jmp .L9
|
||||
|
||||
.L8:
|
||||
fstp st0
|
||||
fstp st0
|
||||
|
||||
.L9:
|
||||
; if (x[3] < x[1])
|
||||
fld dword [ecx+4]
|
||||
fld dword [ecx+12]
|
||||
fcom st1
|
||||
fnstsw ax
|
||||
and ah, 0x1
|
||||
jz .L10
|
||||
|
||||
; swap(x[1], x[3]);
|
||||
fstp dword [ecx+4]
|
||||
fstp dword [ecx+12]
|
||||
jmp .L11
|
||||
|
||||
.L10:
|
||||
fstp st0
|
||||
fstp st0
|
||||
|
||||
.L11:
|
||||
; if (x[1] < x[0])
|
||||
fld dword [ecx]
|
||||
fld dword [ecx+4]
|
||||
fcom st1
|
||||
fnstsw ax
|
||||
and ah, 0x1
|
||||
jz .L12
|
||||
|
||||
; swap(x[0], x[1]);
|
||||
fstp dword [ecx]
|
||||
fstp dword [ecx+4]
|
||||
jmp .L13
|
||||
|
||||
.L12:
|
||||
fstp st0
|
||||
fstp st0
|
||||
|
||||
.L13:
|
||||
; if (x[3] < x[2])
|
||||
fld dword [ecx+12]
|
||||
fld dword [ecx+8]
|
||||
fcom st1
|
||||
fnstsw ax
|
||||
and ah, 0x1
|
||||
jz .L14
|
||||
|
||||
; swap(x[2], x[3]);
|
||||
fstp dword [ecx+12]
|
||||
fstp dword [ecx+8]
|
||||
jmp .L15
|
||||
|
||||
.L14:
|
||||
fstp st0
|
||||
fstp st0
|
||||
|
||||
.L15:
|
||||
; if (x[2] < x[1])
|
||||
fld dword [ecx+4]
|
||||
fld dword [ecx+8]
|
||||
fcom st1
|
||||
fnstsw ax
|
||||
and ah, 0x1
|
||||
jz .L16
|
||||
|
||||
; swap(x[1], x[2]);
|
||||
fstp dword [ecx+4]
|
||||
fstp dword [ecx+8]
|
||||
jmp .L17
|
||||
|
||||
.L16:
|
||||
fstp st0
|
||||
fstp st0
|
||||
|
||||
.L17
|
||||
; }
|
||||
;
|
||||
|
||||
.L7
|
||||
; return(nreal);
|
||||
mov eax, dword [nreal]
|
||||
|
||||
.L2:
|
||||
pop ebp
|
||||
ret
|
||||
Loading…
Add table
Add a link
Reference in a new issue