#!/usr/bin/awk -f BEGIN { if (!of) of="lst" pos=0 errors=0 x86=1 x80=0 z80=0 # lookup table for ord() for (i = 32; i <= 128; i++) { t=sprintf("%c", i) _ord_[t]=i } # 8-bit general purpose registers r8["al"]=0 r8["cl"]=1 r8["dl"]=2 r8["bl"]=3 r8["ah"]=4 r8["ch"]=5 r8["dh"]=6 r8["bh"]=7 # 16-bit general purpose registers r16["ax"]=0 r16["cx"]=1 r16["dx"]=2 r16["bx"]=3 r16["sp"]=4 r16["bp"]=5 r16["si"]=6 r16["di"]=7 # segment registers sreg["es"]=0 sreg["cs"]=1 sreg["ss"]=2 sreg["ds"]=3 # indirect access register combinations modstr["bxsi"]=0 modstr["sibx"]=0 modstr["bxdi"]=1 modstr["dibx"]=1 modstr["bpsi"]=2 modstr["sibp"]=2 modstr["bpdi"]=3 modstr["dibp"]=3 modstr["si"]=4 modstr["di"]=5 modstr["bx"]=7 # ALU operations alu["add"]=0 alu["or"]=1 alu["adc"]=2 alu["sbb"]=3 alu["and"]=4 alu["sub"]=5 alu["xor"]=6 alu["cmp"]=7 # near conditional jumps ops_rel8["jo"]=112+0 ops_rel8["jno"]=112+1 ops_rel8["jb"]=112+2 ops_rel8["jc"]=112+2 ops_rel8["jnae"]=112+2 ops_rel8["jae"]=112+3 ops_rel8["jnb"]=112+3 ops_rel8["jnc"]=112+3 ops_rel8["je"]=112+4 ops_rel8["jz"]=112+4 ops_rel8["jne"]=112+5 ops_rel8["jnz"]=112+5 ops_rel8["jbe"]=112+6 ops_rel8["jna"]=112+6 ops_rel8["ja"]=112+7 ops_rel8["jnbe"]=112+7 ops_rel8["js"]=112+8 ops_rel8["jns"]=112+9 ops_rel8["jp"]=112+10 ops_rel8["jpe"]=112+10 ops_rel8["jnp"]=112+11 ops_rel8["jpo"]=112+11 ops_rel8["jl"]=112+12 ops_rel8["jnge"]=112+12 ops_rel8["jge"]=112+13 ops_rel8["jbl"]=112+13 ops_rel8["jle"]=112+14 ops_rel8["jng"]=112+14 ops_rel8["jg"]=112+15 ops_rel8["jnle"]=112+15 # single-byte opcodes without operands ops_sb["stosb"]=170 ops_sb["stosw"]=171 ops_sb["lodsb"]=172 ops_sb["lodsw"]=173 ops_sb["ret"]=195 ops_sb["retf"]=203 } # error string to insert into listing function err(str) { errors++ if (of=="lst") printf("**** %s:\n", str) } # submit a assembling result to output # set label of current line to off function submit(off) { if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0) if (of=="hex" && hex) printf("%s", hex) if (label) { if (of=="sym") printf("%s\tsym\t%d\n", label, off) if (label in prevsym && prevsym[label]!=off) { err(label " different during second pass") } sym[label]=off } pos=pos+length(hex)/2 hex="" } # gets a symbol's values function getsym(name) { if (substr(name,1,1)=="0" || int(name)) { return int(name) } if (substr(name,1,1)==".") name = plabel name if (name in prevsym) { return prevsym[name] } else if (name in sym) { return sym[name] } return 0 } function pushb(val) { while (val<0) val=val+256 hex=hex sprintf("%02X",val) } function pushw(val) { while (val<0) val=val+65536 t=sprintf("%04X",val) hex=hex substr(t,3) substr(t,1,2) } function modrm(str, spare, rs) { mod=3 rm=0 if (str in rs) { mod=3 rm=rs[str] } else if (substr(str,1,1)=="[") { rmap="" gsub(/^\[|\]$/, "", str) split(str,rmp,"+") disp=0 for (i in rmp) { if (rmp[i] in r16) { rmap=rmap rmp[i] } else { disp = disp + getsym(rmp[i]) mod=2 } } if (!rmap) { mod=0 rm=6 } else if (rmap in modstr) { rm=modstr[rmap] } else { err("Bad modR/M") } } #print("modR/M:", mod, spare, rm) pushb(mod*64+spare*8+rm) if (mod == 1) { pushb(disp) } else if (mod == 2 || (mod == 0 && rm=6)) { pushw(disp) } } # dont process empty lines or comment lines /^( |\t)*(;|%)/ || /^( |\t)*$/ { if (of=="lst") printf("%24s%s\n","",$0) next } # load symbols from previous pass $2=="sym" { prevsym[$1]=int($3) #printf("; %s (%s=%X)\n", $0,$1,prevsym[$1]) next } # Start parsing the line # and set up per-line vars { label=$1 gsub(/:$/, "",label) opn=2 split("", b, ":") byteop=1 wordop=1 dwordop=1 } # no label on line! fixup /^ / || /^\t/ { label="" opn=1 } { # implement local labels if (substr(label,1,1)==".") { label=plabel label } else if (label) { plabel=label } # take note if we got a instruction size specifier op=$(opn) if($(opn+1)=="byte") { wordop=0 dwordop=0 opn++ } else if ($(opn+1)=="word") { byteop=0 dwordop=0 opn++ } else if ($(opn+1)=="dword") { byteop=0 wordop=0 opn++ } split("", a, ":") c=0 for (i=opn+1;i<=NF;i++) { if (substr($(i),1,1)==";") break a[++c]=$(i) if (substr($(i),1,1)=="\"") { do { i++ j=index($(i), "\"") if (j) { a[c]=a[c] " " substr($(i), 1, j) break } else { a[c]=a[c] " " $(i) } } while($(i)) } else { gsub(/,$/, "", a[c]) } } op1=a[1] op2=a[2] rm1=(op1 in r16) || (op1 in r8) || substr(op1,1,1)=="[" rm2=(op2 in r16) || (op2 in r8) || substr(op2,1,1)=="[" } # the source line is parsed by here: # - op: opcode name # - a: array of operands, starting with 1 # - c: number of operands # - byteop, wordop, dwordop: test before encoding, all 1 per default # pseudo-opcodes op=="cpu" { x86=(a[1]=="8088"||a[1]=="8086") x80=(a[1]=="8080"||a[1]=="z80") z80=(a[1]=="z80") submit(pos);next } op=="org" { pos=getsym(a[1]) submit(pos);next } op=="equ" { val=getsym(a[1]) submit(val);next } op=="db" { for(i=1;i<=c;i++) { if (substr(a[i],1,1)=="\"") { for(j=2;j reg if (rm1 && op2 in r8) { pushb(136) # 88 modrm(op1, r8[op2], r8) } else if (rm1 && op2 in r16) { pushb(137) # 89 modrm(op1, r16[op2], r16) } else if (op1 in r8 && rm2) { pushb(138) modrm(op2, r8[op1], r8) } else if (op1 in r16 && rm2) { pushb(139) modrm(op2, r16[op1], r16) # modrm <-> sreg } else if (rm1 && op2 in sreg) { pushb(140) modrm(op1, sreg[op2], r16) } else if (rm2 && op1 in sreg) { pushb(142) modrm(op2, sreg[op1], r16) # reg <- imm } else if (op1 in r8) { pushb(176+r8[op1]) pushb(getsym(op2)) } else if (op1 in r16) { pushb(184+r16[op1]) pushw(getsym(op2)) # modrm <- imm } else if (byteop && rm1) { pushb(198) modrm(rm1, 0, r16) } else if (wordop && rm1) { pushb(199) modrm(rm1, 0, r16) } } x86 && op=="push" && op1 in r16 { pushb(80+r16[op1]) } x86 && op=="pop" && op1 in r16 { pushb(88+r16[op1]) } x86 && op=="test" && op2 in r8 { # 84 pushb(132) modrm(op1, r8[op2], r8) } x86 && op=="int" { # CD pushb(205) pushb(getsym(op1)) } x86 && op=="jmp" { pushb(233) pushw(getsym(op1)-(pos+3)) } x86 && op=="call" { pushb(232) pushw(getsym(op1)-(pos+3)) } # opcodes with rel8 encoding x86 && op in ops_rel8 { pushb(ops_rel8[op]) pushb(getsym(op1)-(pos+2)) } # opcodes without arguments x86 && op in ops_sb { pushb(ops_sb[op]) } { if (!hex) err("no encoding found") submit(pos) } END{ if (of=="hex") printf("\n") if (errors) exit(1) }