import of working prototype
This commit is contained in:
commit
e408356e4c
383
asm.awk
Normal file
383
asm.awk
Normal file
@ -0,0 +1,383 @@
|
||||
#!/usr/bin/awk -f
|
||||
BEGIN {
|
||||
if (!of) of="lst"
|
||||
pos=0
|
||||
errors=0
|
||||
x86=1
|
||||
x80=0
|
||||
z80=0
|
||||
# lookup table for ord()
|
||||
for (i = 32; i <= 128; i++) {
|
||||
t=sprintf("%c", i)
|
||||
_ord_[t]=i
|
||||
}
|
||||
# 8-bit general purpose registers
|
||||
r8["al"]=0
|
||||
r8["cl"]=1
|
||||
r8["dl"]=2
|
||||
r8["bl"]=3
|
||||
r8["ah"]=4
|
||||
r8["ch"]=5
|
||||
r8["dh"]=6
|
||||
r8["bh"]=7
|
||||
# 16-bit general purpose registers
|
||||
r16["ax"]=0
|
||||
r16["cx"]=1
|
||||
r16["dx"]=2
|
||||
r16["bx"]=3
|
||||
r16["sp"]=4
|
||||
r16["bp"]=5
|
||||
r16["si"]=6
|
||||
r16["di"]=7
|
||||
# segment registers
|
||||
sreg["es"]=0
|
||||
sreg["cs"]=1
|
||||
sreg["ss"]=2
|
||||
sreg["ds"]=3
|
||||
# indirect access register combinations
|
||||
modstr["bxsi"]=0
|
||||
modstr["sibx"]=0
|
||||
modstr["bxdi"]=1
|
||||
modstr["dibx"]=1
|
||||
modstr["bpsi"]=2
|
||||
modstr["sibp"]=2
|
||||
modstr["bpdi"]=3
|
||||
modstr["dibp"]=3
|
||||
modstr["si"]=4
|
||||
modstr["di"]=5
|
||||
modstr["bx"]=7
|
||||
# ALU operations
|
||||
alu["add"]=0
|
||||
alu["or"]=1
|
||||
alu["adc"]=2
|
||||
alu["sbb"]=3
|
||||
alu["and"]=4
|
||||
alu["sub"]=5
|
||||
alu["xor"]=6
|
||||
alu["cmp"]=7
|
||||
# near conditional jumps
|
||||
ops_rel8["jo"]=112+0
|
||||
ops_rel8["jno"]=112+1
|
||||
ops_rel8["jb"]=112+2
|
||||
ops_rel8["jc"]=112+2
|
||||
ops_rel8["jnae"]=112+2
|
||||
ops_rel8["jae"]=112+3
|
||||
ops_rel8["jnb"]=112+3
|
||||
ops_rel8["jnc"]=112+3
|
||||
ops_rel8["je"]=112+4
|
||||
ops_rel8["jz"]=112+4
|
||||
ops_rel8["jne"]=112+5
|
||||
ops_rel8["jnz"]=112+5
|
||||
ops_rel8["jbe"]=112+6
|
||||
ops_rel8["jna"]=112+6
|
||||
ops_rel8["ja"]=112+7
|
||||
ops_rel8["jnbe"]=112+7
|
||||
ops_rel8["js"]=112+8
|
||||
ops_rel8["jns"]=112+9
|
||||
ops_rel8["jp"]=112+10
|
||||
ops_rel8["jpe"]=112+10
|
||||
ops_rel8["jnp"]=112+11
|
||||
ops_rel8["jpo"]=112+11
|
||||
ops_rel8["jl"]=112+12
|
||||
ops_rel8["jnge"]=112+12
|
||||
ops_rel8["jge"]=112+13
|
||||
ops_rel8["jbl"]=112+13
|
||||
ops_rel8["jle"]=112+14
|
||||
ops_rel8["jng"]=112+14
|
||||
ops_rel8["jg"]=112+15
|
||||
ops_rel8["jnle"]=112+15
|
||||
# single-byte opcodes without operands
|
||||
ops_sb["stosb"]=170
|
||||
ops_sb["stosw"]=171
|
||||
ops_sb["lodsb"]=172
|
||||
ops_sb["lodsw"]=173
|
||||
ops_sb["ret"]=195
|
||||
ops_sb["retf"]=203
|
||||
}
|
||||
# error string to insert into listing
|
||||
function err(str) {
|
||||
errors++
|
||||
printf("ERR: %s:\n", str)
|
||||
}
|
||||
# submit a assembling result to output
|
||||
# set label of current line to off
|
||||
function submit(off) {
|
||||
if (of=="lst") printf("%04X %-10s %s\n", off, hex, $0)
|
||||
if (of=="hex" && hex) printf("%s", hex)
|
||||
if (label) {
|
||||
if (of=="sym") printf("%s\tsym\t%d\n", label, off)
|
||||
if (label in prevsym && prevsym[label]!=off) {
|
||||
err(label " different during second pass")
|
||||
}
|
||||
sym[label]=off
|
||||
}
|
||||
pos=pos+length(hex)/2
|
||||
hex=""
|
||||
}
|
||||
# gets a symbol's values
|
||||
function getsym(name) {
|
||||
if (substr(name,1,1)=="0" || int(name)) {
|
||||
return int(name)
|
||||
}
|
||||
if (substr(name,1,1)==".") name = plabel name
|
||||
if (name in prevsym) {
|
||||
return prevsym[name]
|
||||
} else if (name in sym) {
|
||||
return sym[name]
|
||||
}
|
||||
return 0
|
||||
}
|
||||
function pushb(val) {
|
||||
while (val<0) val=val+256
|
||||
hex=hex sprintf("%02X",val)
|
||||
}
|
||||
function pushw(val) {
|
||||
while (val<0) val=val+65536
|
||||
t=sprintf("%04X",val)
|
||||
hex=hex substr(t,3) substr(t,1,2)
|
||||
}
|
||||
function modrm(str, spare, rs) {
|
||||
mod=3
|
||||
rm=0
|
||||
if (str in rs) {
|
||||
mod=3
|
||||
rm=rs[str]
|
||||
} else if (substr(str,1,1)=="[") {
|
||||
rmap=""
|
||||
gsub(/^\[|\]$/, "", str)
|
||||
split(str,rmp,"+")
|
||||
disp=0
|
||||
for (i in rmp) {
|
||||
if (rmp[i] in r16) {
|
||||
rmap=rmap rmp[i]
|
||||
} else {
|
||||
disp = disp + getsym(rmp[i])
|
||||
mod=2
|
||||
}
|
||||
}
|
||||
if (!rmap) {
|
||||
mod=0
|
||||
rm=6
|
||||
} else if (rmap in modstr) {
|
||||
rm=modstr[rmap]
|
||||
} else {
|
||||
err("Bad modR/M")
|
||||
}
|
||||
}
|
||||
#print("modR/M:", mod, spare, rm)
|
||||
pushb(mod*64+spare*8+rm)
|
||||
if (mod == 1) {
|
||||
pushb(disp)
|
||||
} else if (mod == 2 || (mod == 0 && rm=6)) {
|
||||
pushw(disp)
|
||||
}
|
||||
|
||||
}
|
||||
# dont process empty lines or comment lines
|
||||
/^( |\t)*;/ || /^( |\t)*$/ {
|
||||
if (of=="lst") printf("%16s%s\n","",$0)
|
||||
next
|
||||
}
|
||||
# load symbols from previous pass
|
||||
$2=="sym" {
|
||||
prevsym[$1]=int($3)
|
||||
#printf("; %s (%s=%X)\n", $0,$1,prevsym[$1])
|
||||
next
|
||||
}
|
||||
# Start parsing the line
|
||||
# and set up per-line vars
|
||||
{
|
||||
label=$1
|
||||
gsub(/:$/, "",label)
|
||||
opn=2
|
||||
split("", b, ":")
|
||||
byteop=1
|
||||
wordop=1
|
||||
dwordop=1
|
||||
}
|
||||
# no label on line! fixup
|
||||
/^ / || /^\t/ {
|
||||
label=""
|
||||
opn=1
|
||||
}
|
||||
{
|
||||
# implement local labels
|
||||
if (substr(label,1,1)==".") {
|
||||
label=plabel label
|
||||
} else if (label) {
|
||||
plabel=label
|
||||
}
|
||||
# take note if we got a instruction size specifier
|
||||
op=$(opn)
|
||||
if($(opn+1)=="byte") {
|
||||
wordop=0
|
||||
dwordop=0
|
||||
opn++
|
||||
} else if ($(opn+1)=="word") {
|
||||
byteop=0
|
||||
dwordop=0
|
||||
opn++
|
||||
} else if ($(opn+1)=="dword") {
|
||||
byteop=0
|
||||
wordop=0
|
||||
opn++
|
||||
}
|
||||
split("", a, ":")
|
||||
c=0
|
||||
for (i=opn+1;i<=NF;i++) {
|
||||
if (substr($(i),1,1)==";") break
|
||||
a[++c]=$(i)
|
||||
if (substr($(i),1,1)=="\"") {
|
||||
do {
|
||||
i++
|
||||
j=index($(i), "\"")
|
||||
if (j) {
|
||||
a[c]=a[c] " " substr($(i), 1, j)
|
||||
break
|
||||
} else {
|
||||
a[c]=a[c] " " $(i)
|
||||
}
|
||||
} while($(i))
|
||||
} else {
|
||||
gsub(/,$/, "", a[c])
|
||||
}
|
||||
}
|
||||
op1=a[1]
|
||||
op2=a[2]
|
||||
rm1=(op1 in r16) || (op1 in r8) || substr(op1,1,1)=="["
|
||||
rm2=(op2 in r16) || (op2 in r8) || substr(op2,1,1)=="["
|
||||
}
|
||||
# the source line is parsed by here:
|
||||
# - op: opcode name
|
||||
# - a: array of operands, starting with 1
|
||||
# - c: number of operands
|
||||
# - byteop, wordop, dwordop: test before encoding, all 1 per default
|
||||
# pseudo-opcodes
|
||||
op=="cpu" {
|
||||
x86=(a[1]=="8088"||a[1]=="8086")
|
||||
x80=(a[1]=="8080"||a[1]=="z80")
|
||||
z80=(a[1]=="z80")
|
||||
submit(pos);next
|
||||
}
|
||||
op=="org" {
|
||||
pos=getsym(a[1])
|
||||
submit(pos);next
|
||||
}
|
||||
op=="equ" {
|
||||
val=getsym(a[1])
|
||||
submit(val);next
|
||||
}
|
||||
op=="db" {
|
||||
for(i=1;i<=c;i++) {
|
||||
if (substr(a[i],1,1)=="\"") {
|
||||
for(j=2;j<length(a[i]);j++) {
|
||||
pushb(_ord_[substr(a[i],j,1)])
|
||||
}
|
||||
} else {
|
||||
pushb(getsym(a[i]))
|
||||
}
|
||||
}
|
||||
}
|
||||
op=="dw" {
|
||||
for(i=1;i<=c;i++) {
|
||||
pushw(getsym(a[i]))
|
||||
}
|
||||
}
|
||||
# x86 opcodes in approx. encoding order
|
||||
# arithmetics: ADD, SUB, XOR etc
|
||||
x86 && op in alu {
|
||||
if (rm1 && op2 in r8) {
|
||||
pushb(0+alu[op]*8)
|
||||
modrm(op1, r8[op2], r8)
|
||||
} else if (rm1 && op2 in r16) {
|
||||
pushb(1+alu[op]*8)
|
||||
modrm(op1, r16[op2], r16)
|
||||
} else if (op1 in r8 && rm2) {
|
||||
pushb(2+alu[op]*8)
|
||||
modrm(op2, r8[op1], r8)
|
||||
} else if (op1 in r16 && rm2) {
|
||||
pushb(3+alu[op]*8)
|
||||
modrm(op2, r16[op1], r16)
|
||||
} else if (op1 in r16) {
|
||||
pushb(129) # 81
|
||||
modrm(op1, alu[op], r16)
|
||||
pushw(getsym(op2))
|
||||
} else if (op1 in r8) {
|
||||
pushb(128) # 80
|
||||
modrm(op1, alu[op], r8)
|
||||
pushw(getsym(op2))
|
||||
}
|
||||
}
|
||||
# MOV variants
|
||||
x86 && op=="mov" {
|
||||
# modrm <-> reg
|
||||
if (rm1 && op2 in r8) {
|
||||
pushb(136) # 88
|
||||
modrm(op1, r8[op2], r8)
|
||||
} else if (rm1 && op2 in r16) {
|
||||
pushb(137) # 89
|
||||
modrm(op1, r16[op2], r16)
|
||||
} else if (op1 in r8 && rm2) {
|
||||
pushb(138)
|
||||
modrm(op2, r8[op1], r8)
|
||||
} else if (op1 in r16 && rm2) {
|
||||
pushb(139)
|
||||
modrm(op2, r16[op1], r16)
|
||||
# modrm <-> sreg
|
||||
} else if (rm1 && op2 in sreg) {
|
||||
pushb(140)
|
||||
modrm(op1, sreg[op2], r16)
|
||||
} else if (rm2 && op1 in sreg) {
|
||||
pushb(142)
|
||||
modrm(op2, sreg[op1], r16)
|
||||
# reg <- imm
|
||||
} else if (op1 in r8) {
|
||||
pushb(176+r8[op1])
|
||||
pushb(getsym(op2))
|
||||
} else if (op1 in r16) {
|
||||
pushb(184+r16[op1])
|
||||
pushw(getsym(op2))
|
||||
}
|
||||
}
|
||||
x86 && op=="push" && op1 in r16 {
|
||||
pushb(80+r16[op1])
|
||||
}
|
||||
x86 && op=="pop" && op1 in r16 {
|
||||
pushb(88+r16[op1])
|
||||
}
|
||||
x86 && op=="test" && op2 in r8 { # 84
|
||||
pushb(132)
|
||||
modrm(op1, r8[op2], r8)
|
||||
}
|
||||
x86 && op=="int" { # CD
|
||||
pushb(205)
|
||||
pushb(getsym(op1))
|
||||
}
|
||||
x86 && op=="jmp" {
|
||||
pushb(233)
|
||||
v=getsym(op1)-(pos+3)
|
||||
pushw(v)
|
||||
}
|
||||
x86 && op=="call" {
|
||||
pushb(232)
|
||||
v=getsym(op1)-(pos+3)
|
||||
pushw(v)
|
||||
}
|
||||
# opcodes with rel8 encoding
|
||||
x86 && op in ops_rel8 {
|
||||
pushb(ops_rel8[op])
|
||||
v=getsym(op1)-(pos+2)
|
||||
pushb(v)
|
||||
}
|
||||
# opcodes without arguments
|
||||
x86 && op in ops_sb {
|
||||
pushb(ops_sb[op])
|
||||
}
|
||||
{
|
||||
if (!hex) err("no encoding found")
|
||||
submit(pos)
|
||||
}
|
||||
END{
|
||||
if (of=="hex") printf("\n")
|
||||
if (errors) exit(1)
|
||||
}
|
Loading…
Reference in New Issue
Block a user