movss xmm0,[a] ; load one float from memoryThe full list of single-float instructions is below. There are also double precision instructions, and some very interesting parallel instructions (we'll talk about these next week).
addss xmm0,xmm0 ; add to itself (double it)
movss [a],xmm0 ; store back to memory
mov rdi,a; address of our float
mov rsi,1; number of floats to print
sub rsp,8 ; align stack for farray_print
extern farray_print
call farray_print
add rsp,8 ; Clean up stack
ret ; Done with function
section .data
a: dd 1.234
Instruction |
Comments |
|
Arithmetic |
addss |
sub, mul, div all work the same way |
Compare |
minss |
max works the same way |
Sqrt |
sqrtss |
Square root (sqrt), reciprocal (rcp), and reciprocal-square-root (rsqrt) all work the same way |
Move |
movss |
Copy DWORD sized data to and from
memory. |
Convert | cvtss2si cvttss2si |
Convert to ("2", get it?) Single
Integer (si, stored in register like eax). "cvtt" versions do truncation (round toward zero, like C++ default); "cvt"
versions round to nearest. |
Compare to flags |
ucomiss |
Sets CPU flags like normal x86 "cmp" instruction, but from SSE registers.
Use with "jb", "jbe", "je", "jae", or "ja" for normal
comparisons (but not jl, jle, jg, or jge, for some reason). Sets "pf", the parity flag, if either input is a NaN. |
movss xmm3,[pi]; load up constantHere we're using ucomiss to compare two floats:
addss xmm3,xmm3 ; add pi to itself
cvtss2si eax,xmm3 ; round to integer
ret
section .data
pi: dd 3.14159265358979 ; constant
movss xmm3,[a]
ucomiss xmm3,[b]
jbe wejumped
mov eax, 1
ret
wejumped:
mov eax,3
ret
a: dd 1.23
b: dd 1.27