Ddoc
$(SPEC_S D x86 Inline Assembler,
$(P D, being a systems programming language, provides an inline
assembler.
The inline assembler is standardized for D implementations across
the same CPU family, for example, the Intel Pentium inline assembler
for a Win32 D compiler will be syntax compatible with the inline
assembler for Linux running on an Intel Pentium.
)
$(P Implementations of D on different architectures, however, are
free to innovate upon the memory model, function call/return conventions,
argument passing conventions, etc.
)
$(P This document describes the x86 implementation of the inline
assembler.
)
$(GRAMMAR
$(GNAME AsmInstruction):
$(I Identifier) $(B :) $(I AsmInstruction)
$(B align) $(GLINK IntegerExpression)
$(B even)
$(B naked)
$(B db) $(I Operands)
$(B ds) $(I Operands)
$(B di) $(I Operands)
$(B dl) $(I Operands)
$(B df) $(I Operands)
$(B dd) $(I Operands)
$(B de) $(I Operands)
$(I Opcode)
$(I Opcode Operands)
$(GNAME Operands):
$(I Operand)
$(I Operand) $(B ,) $(I Operands)
)
Labels
$(P Assembler instructions can be labeled just like other statements.
They can be the target of goto statements.
For example:
)
--------------
void *pc;
asm
{
call L1 ;
L1: ;
pop EBX ;
mov pc[EBP],EBX ; // pc now points to code at L1
}
--------------
align $(I IntegerExpression)
$(GRAMMAR
$(GNAME IntegerExpression):
$(LINK2 lex.html#IntegerLiteral, $(I IntegerLiteral))
$(I Identifier)
)
$(P Causes the assembler to emit NOP instructions to align the next
assembler instruction on an $(I IntegerExpression) boundary.
$(I IntegerExpression) must evaluate at compile time to an integer that is
a power of 2.
)
$(P Aligning the start of a loop body can sometimes have a dramatic
effect on the execution speed.
)
even
$(P Causes the assembler to emit NOP instructions to align the next
assembler instruction on an even boundary.
)
naked
$(P Causes the compiler to not generate the function prolog and epilog
sequences. This means such is the responsibility of inline
assembly programmer, and is normally used when the entire function
is to be written in assembler.
)
db, ds, di, dl, df, dd, de
These pseudo ops are for inserting raw data directly into
the code.
$(B db) is for bytes,
$(B ds) is for 16 bit words,
$(B di) is for 32 bit words,
$(B dl) is for 64 bit words,
$(B df) is for 32 bit floats,
$(B dd) is for 64 bit doubles,
and $(B de) is for 80 bit extended reals.
Each can have multiple operands.
If an operand is a string literal, it is as if there were $(I length)
operands, where $(I length) is the number of characters in the string.
One character is used per operand.
For example:
--------------
asm
{
db 5,6,0x83; // insert bytes 0x05, 0x06, and 0x83 into code
ds 0x1234; // insert bytes 0x34, 0x12
di 0x1234; // insert bytes 0x34, 0x12, 0x00, 0x00
dl 0x1234; // insert bytes 0x34, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
df 1.234; // insert float 1.234
dd 1.234; // insert double 1.234
de 1.234; // insert real 1.234
db "abc"; // insert bytes 0x61, 0x62, and 0x63
ds "abc"; // insert bytes 0x61, 0x00, 0x62, 0x00, 0x63, 0x00
}
--------------
Opcodes
A list of supported opcodes is at the end.
The following registers are supported. Register names
are always in upper case.
$(GRAMMAR
$(GNAME Register):
$(B AL) $(B AH) $(B AX) $(B EAX)
$(B BL) $(B BH) $(B BX) $(B EBX)
$(B CL) $(B CH) $(B CX) $(B ECX)
$(B DL) $(B DH) $(B DX) $(B EDX)
$(B BP) $(B EBP)
$(B SP) $(B ESP)
$(B DI) $(B EDI)
$(B SI) $(B ESI)
$(B ES) $(B CS) $(B SS) $(B DS) $(B GS) $(B FS)
$(B CR0) $(B CR2) $(B CR3) $(B CR4)
$(B DR0) $(B DR1) $(B DR2) $(B DR3) $(B DR6) $(B DR7)
$(B TR3) $(B TR4) $(B TR5) $(B TR6) $(B TR7)
$(B ST)
$(B ST(0)) $(B ST(1)) $(B ST(2)) $(B ST(3)) $(B ST(4)) $(B ST(5)) $(B ST(6)) $(B ST(7))
$(B MM0) $(B MM1) $(B MM2) $(B MM3) $(B MM4) $(B MM5) $(B MM6) $(B MM7)
$(B XMM0) $(B XMM1) $(B XMM2) $(B XMM3) $(B XMM4) $(B XMM5) $(B XMM6) $(B XMM7)
)
Special Cases
$(DL
$(DT $(B lock), $(B rep), $(B repe), $(B repne),
$(B repnz), $(B repz))
$(DD These prefix instructions do not appear in the same statement
as the instructions they prefix; they appear in their own statement.
For example:
--------------
asm
{
rep ;
movsb ;
}
--------------
)
$(DT $(B pause))
$(DD This opcode is not supported by the assembler, instead use
--------------
{
rep ;
nop ;
}
--------------
which produces the same result.
)
$(DT $(B floating point ops))
$(DD Use the two operand form of the instruction format;
--------------
fdiv ST(1); // wrong
fmul ST; // wrong
fdiv ST,ST(1); // right
fmul ST,ST(0); // right
--------------
)
)
Operands
$(GRAMMAR
$(GNAME Operand):
$(I AsmExp)
$(GNAME AsmExp):
$(I AsmLogOrExp)
$(I AsmLogOrExp) $(B ?) $(I AsmExp) $(B :) $(I AsmExp)
$(GNAME AsmLogOrExp):
$(I AsmLogAndExp)
$(I AsmLogAndExp) $(B ||) $(I AsmLogAndExp)
$(GNAME AsmLogAndExp):
$(I AsmOrExp)
$(I AsmOrExp) $(B &&) $(I AsmOrExp)
$(GNAME AsmOrExp):
$(I AsmXorExp)
$(I AsmXorExp) $(B |) $(I AsmXorExp)
$(GNAME AsmXorExp):
$(I AsmAndExp)
$(I AsmAndExp) $(B ^) $(I AsmAndExp)
$(GNAME AsmAndExp):
$(I AsmEqualExp)
$(I AsmEqualExp) $(B &) $(I AsmEqualExp)
$(GNAME AsmEqualExp):
$(I AsmRelExp)
$(I AsmRelExp) $(B ==) $(I AsmRelExp)
$(I AsmRelExp) $(B !=) $(I AsmRelExp)
$(GNAME AsmRelExp):
$(I AsmShiftExp)
$(I AsmShiftExp) $(B <) $(I AsmShiftExp)
$(I AsmShiftExp) $(B <=) $(I AsmShiftExp)
$(I AsmShiftExp) $(B >) $(I AsmShiftExp)
$(I AsmShiftExp) $(B >=) $(I AsmShiftExp)
$(GNAME AsmShiftExp):
$(I AsmAddExp)
$(I AsmAddExp) $(B <<) $(I AsmAddExp)
$(I AsmAddExp) $(B >>) $(I AsmAddExp)
$(I AsmAddExp) $(B >>>) $(I AsmAddExp)
$(GNAME AsmAddExp):
$(I AsmMulExp)
$(I AsmMulExp) $(B +) $(I AsmMulExp)
$(I AsmMulExp) $(B -) $(I AsmMulExp)
$(GNAME AsmMulExp):
$(I AsmBrExp)
$(I AsmBrExp) $(B *) $(I AsmBrExp)
$(I AsmBrExp) $(B /) $(I AsmBrExp)
$(I AsmBrExp) $(B %) $(I AsmBrExp)
$(GNAME AsmBrExp):
$(I AsmUnaExp)
$(I AsmBrExp) $(B [) $(I AsmExp) $(B ])
$(GNAME AsmUnaExp):
$(I AsmTypePrefix) $(I AsmExp)
$(B offsetof) $(I AsmExp)
$(B seg) $(I AsmExp)
$(B +) $(I AsmUnaExp)
$(B -) $(I AsmUnaExp)
$(B !) $(I AsmUnaExp)
$(B ~) $(I AsmUnaExp)
$(I AsmPrimaryExp)
$(GNAME AsmPrimaryExp):
$(LINK2 lex.html#IntegerLiteral, $(I IntegerLiteral))
$(LINK2 lex.html#FloatLiteral, $(I FloatLiteral))
$(B __LOCAL_SIZE)
$(B $)
$(GLINK Register)
$(I DotIdentifier)
$(GNAME DotIdentifier):
$(I Identifier)
$(I Identifier) $(B .) $(I DotIdentifier)
)
$(P The operand syntax more or less follows the Intel CPU documentation
conventions.
In particular, the convention is that for two operand instructions
the source is the right operand and the destination is the left
operand.
The syntax differs from that of Intel's in order to be compatible
with the D language tokenizer and to simplify parsing.
)
$(P The $(B seg) means load the segment number that the symbol is
in. This is not relevant for flat model code.
Instead, do a move from the relevant segment register.
)
Operand Types
$(GRAMMAR
$(GNAME AsmTypePrefix):
$(B near ptr)
$(B far ptr)
$(B byte ptr)
$(B short ptr)
$(B int ptr)
$(B word ptr)
$(B dword ptr)
$(B qword ptr)
$(B float ptr)
$(B double ptr)
$(B real ptr)
)
$(P In cases where the operand size is ambiguous, as in:)
--------------
add [EAX],3 ;
--------------
$(P it can be disambiguated by using an $(I AsmTypePrefix):)
--------------
add byte ptr [EAX],3 ;
add int ptr [EAX],7 ;
--------------
$(P $(B far ptr) is not relevant for flat model code.
)
Struct/Union/Class Member Offsets
$(P To access members of an aggregate, given a pointer to the aggregate
is in a register, use the qualified name of the member:
)
--------------
struct Foo { int a,b,c; }
int bar(Foo *f)
{
asm
{ mov EBX,f ;
mov EAX,Foo.b[EBX] ;
}
}
--------------
Stack Variables
$(P Stack variables (variables local to a function and allocated
on the stack) are accessed via the name of the variable indexed
by EBP:
)
---
int foo(int x)
{
asm
{
mov EAX,x[EBP] ; // loads value of parameter x into EAX
mov EAX,x ; // does the same thing
}
}
---
$(P If the [EBP] is omitted, it is assumed for local variables.
If $(B naked) is used, this no longer holds.
)
Special Symbols
$(DL
$(DT $(B $))
$(DD Represents the program counter of the start of the next
instruction. So,
--------------
jmp $ ;
--------------
branches to the instruction following the jmp instruction.
The $(B $) can only appear as the target of a jmp or call
instruction.
)
$(DT $(B __LOCAL_SIZE))
$(DD This gets replaced by the number of local bytes in the local
stack frame. It is most handy when the $(B naked) is invoked
and a custom stack frame is programmed.
)
)
Opcodes Supported
$(TABLE1
aaa |
aad |
aam |
aas |
adc |
add |
addpd |
addps |
addsd |
addss |
and |
andnpd |
andnps |
andpd |
andps |
arpl |
bound |
bsf |
bsr |
bswap |
bt |
btc |
btr |
bts |
call |
cbw |
cdq |
clc |
cld |
clflush |
cli |
clts |
cmc |
cmova |
cmovae |
cmovb |
cmovbe |
cmovc |
cmove |
cmovg |
cmovge |
cmovl |
cmovle |
cmovna |
cmovnae |
cmovnb |
cmovnbe |
cmovnc |
cmovne |
cmovng |
cmovnge |
cmovnl |
cmovnle |
cmovno |
cmovnp |
cmovns |
cmovnz |
cmovo |
cmovp |
cmovpe |
cmovpo |
cmovs |
cmovz |
cmp |
cmppd |
cmpps |
cmps |
cmpsb |
cmpsd |
cmpss |
cmpsw |
cmpxch8b |
cmpxchg |
comisd |
comiss |
cpuid |
cvtdq2pd |
cvtdq2ps |
cvtpd2dq |
cvtpd2pi |
cvtpd2ps |
cvtpi2pd |
cvtpi2ps |
cvtps2dq |
cvtps2pd |
cvtps2pi |
cvtsd2si |
cvtsd2ss |
cvtsi2sd |
cvtsi2ss |
cvtss2sd |
cvtss2si |
cvttpd2dq |
cvttpd2pi |
cvttps2dq |
cvttps2pi |
cvttsd2si |
cvttss2si |
cwd |
cwde |
da |
daa |
das |
db |
dd |
de |
dec |
df |
di |
div |
divpd |
divps |
divsd |
divss |
dl |
dq |
ds |
dt |
dw |
emms |
enter |
f2xm1 |
fabs |
fadd |
faddp |
fbld |
fbstp |
fchs |
fclex |
fcmovb |
fcmovbe |
fcmove |
fcmovnb |
fcmovnbe |
fcmovne |
fcmovnu |
fcmovu |
fcom |
fcomi |
fcomip |
fcomp |
fcompp |
fcos |
fdecstp |
fdisi |
fdiv |
fdivp |
fdivr |
fdivrp |
feni |
ffree |
fiadd |
ficom |
ficomp |
fidiv |
fidivr |
fild |
fimul |
fincstp |
finit |
fist |
fistp |
fisub |
fisubr |
fld |
fld1 |
fldcw |
fldenv |
fldl2e |
fldl2t |
fldlg2 |
fldln2 |
fldpi |
fldz |
fmul |
fmulp |
fnclex |
fndisi |
fneni |
fninit |
fnop |
fnsave |
fnstcw |
fnstenv |
fnstsw |
fpatan |
fprem |
fprem1 |
fptan |
frndint |
frstor |
fsave |
fscale |
fsetpm |
fsin |
fsincos |
fsqrt |
fst |
fstcw |
fstenv |
fstp |
fstsw |
fsub |
fsubp |
fsubr |
fsubrp |
ftst |
fucom |
fucomi |
fucomip |
fucomp |
fucompp |
fwait |
fxam |
fxch |
fxrstor |
fxsave |
fxtract |
fyl2x |
fyl2xp1 |
hlt |
idiv |
imul |
in |
inc |
ins |
insb |
insd |
insw |
int |
into |
invd |
invlpg |
iret |
iretd |
ja |
jae |
jb |
jbe |
jc |
jcxz |
je |
jecxz |
jg |
jge |
jl |
jle |
jmp |
jna |
jnae |
jnb |
jnbe |
jnc |
jne |
jng |
jnge |
jnl |
jnle |
jno |
jnp |
jns |
jnz |
jo |
jp |
jpe |
jpo |
js |
jz |
lahf |
lar |
ldmxcsr |
lds |
lea |
leave |
les |
lfence |
lfs |
lgdt |
lgs |
lidt |
lldt |
lmsw |
lock |
lods |
lodsb |
lodsd |
lodsw |
loop |
loope |
loopne |
loopnz |
loopz |
lsl |
lss |
ltr |
maskmovdqu |
maskmovq |
maxpd |
maxps |
maxsd |
maxss |
mfence |
minpd |
minps |
minsd |
minss |
mov |
movapd |
movaps |
movd |
movdq2q |
movdqa |
movdqu |
movhlps |
movhpd |
movhps |
movlhps |
movlpd |
movlps |
movmskpd |
movmskps |
movntdq |
movnti |
movntpd |
movntps |
movntq |
movq |
movq2dq |
movs |
movsb |
movsd |
movss |
movsw |
movsx |
movupd |
movups |
movzx |
mul |
mulpd |
mulps |
mulsd |
mulss |
neg |
nop |
not |
or |
orpd |
orps |
out |
outs |
outsb |
outsd |
outsw |
packssdw |
packsswb |
packuswb |
paddb |
paddd |
paddq |
paddsb |
paddsw |
paddusb |
paddusw |
paddw |
pand |
pandn |
pavgb |
pavgw |
pcmpeqb |
pcmpeqd |
pcmpeqw |
pcmpgtb |
pcmpgtd |
pcmpgtw |
pextrw |
pinsrw |
pmaddwd |
pmaxsw |
pmaxub |
pminsw |
pminub |
pmovmskb |
pmulhuw |
pmulhw |
pmullw |
pmuludq |
pop |
popa |
popad |
popf |
popfd |
por |
prefetchnta |
prefetcht0 |
prefetcht1 |
prefetcht2 |
psadbw |
pshufd |
pshufhw |
pshuflw |
pshufw |
pslld |
pslldq |
psllq |
psllw |
psrad |
psraw |
psrld |
psrldq |
psrlq |
psrlw |
psubb |
psubd |
psubq |
psubsb |
psubsw |
psubusb |
psubusw |
psubw |
punpckhbw |
punpckhdq |
punpckhqdq |
punpckhwd |
punpcklbw |
punpckldq |
punpcklqdq |
punpcklwd |
push |
pusha |
pushad |
pushf |
pushfd |
pxor |
rcl |
rcpps |
rcpss |
rcr |
rdmsr |
rdpmc |
rdtsc |
rep |
repe |
repne |
repnz |
repz |
ret |
retf |
rol |
ror |
rsm |
rsqrtps |
rsqrtss |
sahf |
sal |
sar |
sbb |
scas |
scasb |
scasd |
scasw |
seta |
setae |
setb |
setbe |
setc |
sete |
setg |
setge |
setl |
setle |
setna |
setnae |
setnb |
setnbe |
setnc |
setne |
setng |
setnge |
setnl |
setnle |
setno |
setnp |
setns |
setnz |
seto |
setp |
setpe |
setpo |
sets |
setz |
sfence |
sgdt |
shl |
shld |
shr |
shrd |
shufpd |
shufps |
sidt |
sldt |
smsw |
sqrtpd |
sqrtps |
sqrtsd |
sqrtss |
stc |
std |
sti |
stmxcsr |
stos |
stosb |
stosd |
stosw |
str |
sub |
subpd |
subps |
subsd |
subss |
sysenter |
sysexit |
test |
ucomisd |
ucomiss |
ud2 |
unpckhpd |
unpckhps |
unpcklpd |
unpcklps |
verr |
verw |
wait |
wbinvd |
wrmsr |
xadd |
xchg |
xlat |
xlatb |
xor |
xorpd |
xorps |
|
|
|
|
)
Pentium 4 (Prescott) Opcodes Supported
$(TABLE1
addsubpd |
addsubps |
fisttp |
haddpd |
haddps |
hsubpd |
hsubps |
lddqu |
monitor |
movddup |
movshdup |
movsldup |
mwait |
|
|
)
AMD Opcodes Supported
$(TABLE1
pavgusb |
pf2id |
pfacc |
pfadd |
pfcmpeq |
pfcmpge |
pfcmpgt |
pfmax |
pfmin |
pfmul |
pfnacc |
pfpnacc |
pfrcp |
pfrcpit1 |
pfrcpit2 |
pfrsqit1 |
pfrsqrt |
pfsub |
pfsubr |
pi2fd |
pmulhrw |
pswapd |
)
$(COMMENT
SSE4.1
blendpd
blendps
blendvpd
blendvps
dppd
dpps
extractps
insertps
movntdqa
mpsadbw
packusdw
pblendub
pblendw
pcmpeqq
pextrb
pextrd
pextrq
pextrw
phminposuw
pinsrb
pinsrd
pinsrq
pmaxsb
pmaxsd
pmaxud
pmaxuw
pminsb
pminsd
pminud
pminuw
pmovsxbd
pmovsxbq
pmovsxbw
pmovsxwd
pmovsxwq
pmovsxdq
pmovzxbd
pmovzxbq
pmovzxbw
pmovzxwd
pmovzxwq
pmovzxdq
pmuldq
pmulld
ptest
roundpd
roundps
roundsd
roundss
SSE4.2
crc32
pcmpestri
pcmpestrm
pcmpistri
pcmpistrm
pcmpgtq
popcnt
VMS
invept
invvpid
vmcall
vmclear
vmlaunch
vmresume
vmptrld
vmptrst
vmread
vmwrite
vmxoff
vmxon
SMX
getsec
)
)
Macros:
TITLE=Inline Assembler
WIKI=IAsm