1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
| __m128 __declspec(naked) __stdcall am_exp_ps(__m128 x)
{
__asm
{
minps xmm0, _ps_exp_hi
movaps xmm1, _ps_exp_rln2
maxps xmm0, _ps_exp_lo
mulps xmm1, xmm0
xorps xmm7, xmm7
mov ecx, esp
addps xmm1, _ps_am_0p5
movaps xmm2, xmm1
cvttps2pi mm0, xmm1
cmpltps xmm2, xmm7
ASM_MOVE_H2L(xmm1)
andps xmm2, _ps_am_1
cvttps2pi mm1, xmm1
movq mm5, _pi32_0x7f
cvtps2pi mm2, xmm2 // needn't truncate
ASM_MOVE_H2L(xmm2)
cvtps2pi mm3, xmm2 // needn't truncate
psubd mm0, mm2
psubd mm1, mm3
and ecx, ~15
cvtpi2ps xmm1, mm1
ASM_MOVE_L2H(xmm1)
paddd mm1, mm5
cvtpi2ps xmm1, mm0
paddd mm0, mm5
movaps xmm2, xmm1
mulps xmm1, _ps_exp_c1
mulps xmm2, _ps_exp_c2
subps xmm0, xmm1
pslld mm0, 23
subps xmm0, xmm2
pslld mm1, 23
movaps xmm2, xmm0
movq [ecx - 16], mm0
mulps xmm2, xmm2
movq [ecx - 16 + 8], mm1
movaps xmm6, _ps_exp_q0
movaps xmm4, _ps_exp_p0
mulps xmm6, xmm2
movaps xmm7, _ps_exp_q1
mulps xmm4, xmm2
movaps xmm5, _ps_exp_p1
addps xmm6, xmm7
addps xmm4, xmm5
movaps xmm7, _ps_exp_q2
mulps xmm6, xmm2
mulps xmm4, xmm2
addps xmm6, xmm7
mulps xmm4, xmm0
movaps xmm7, _ps_exp_q3
mulps xmm6, xmm2
addps xmm4, xmm0
addps xmm6, xmm7
movaps xmm0, [ecx - 16]
subps xmm6, xmm4
movaps xmm7, _ps_am_1
rcpps xmm6, xmm6
mulps xmm4, xmm6
addps xmm4, xmm4
addps xmm4, xmm7
mulps xmm0, xmm4
ret 16
}
} |
Partager