commit e408356e4ce0e77f5fadc9c2231fa54614722845 Author: Nero <41307858+nero@users.noreply.github.com> Date: Fri Jan 7 21:36:57 2022 +0000 import of working prototype diff --git a/asm.awk b/asm.awk new file mode 100644 index 0000000..74b86c8 --- /dev/null +++ b/asm.awk @@ -0,0 +1,383 @@ +#!/usr/bin/awk -f +BEGIN { + if (!of) of="lst" + pos=0 + errors=0 + x86=1 + x80=0 + z80=0 + # lookup table for ord() + for (i = 32; i <= 128; i++) { + t=sprintf("%c", i) + _ord_[t]=i + } + # 8-bit general purpose registers + r8["al"]=0 + r8["cl"]=1 + r8["dl"]=2 + r8["bl"]=3 + r8["ah"]=4 + r8["ch"]=5 + r8["dh"]=6 + r8["bh"]=7 + # 16-bit general purpose registers + r16["ax"]=0 + r16["cx"]=1 + r16["dx"]=2 + r16["bx"]=3 + r16["sp"]=4 + r16["bp"]=5 + r16["si"]=6 + r16["di"]=7 + # segment registers + sreg["es"]=0 + sreg["cs"]=1 + sreg["ss"]=2 + sreg["ds"]=3 + # indirect access register combinations + modstr["bxsi"]=0 + modstr["sibx"]=0 + modstr["bxdi"]=1 + modstr["dibx"]=1 + modstr["bpsi"]=2 + modstr["sibp"]=2 + modstr["bpdi"]=3 + modstr["dibp"]=3 + modstr["si"]=4 + modstr["di"]=5 + modstr["bx"]=7 + # ALU operations + alu["add"]=0 + alu["or"]=1 + alu["adc"]=2 + alu["sbb"]=3 + alu["and"]=4 + alu["sub"]=5 + alu["xor"]=6 + alu["cmp"]=7 + # near conditional jumps + ops_rel8["jo"]=112+0 + ops_rel8["jno"]=112+1 + ops_rel8["jb"]=112+2 + ops_rel8["jc"]=112+2 + ops_rel8["jnae"]=112+2 + ops_rel8["jae"]=112+3 + ops_rel8["jnb"]=112+3 + ops_rel8["jnc"]=112+3 + ops_rel8["je"]=112+4 + ops_rel8["jz"]=112+4 + ops_rel8["jne"]=112+5 + ops_rel8["jnz"]=112+5 + ops_rel8["jbe"]=112+6 + ops_rel8["jna"]=112+6 + ops_rel8["ja"]=112+7 + ops_rel8["jnbe"]=112+7 + ops_rel8["js"]=112+8 + ops_rel8["jns"]=112+9 + ops_rel8["jp"]=112+10 + ops_rel8["jpe"]=112+10 + ops_rel8["jnp"]=112+11 + ops_rel8["jpo"]=112+11 + ops_rel8["jl"]=112+12 + ops_rel8["jnge"]=112+12 + ops_rel8["jge"]=112+13 + ops_rel8["jbl"]=112+13 + ops_rel8["jle"]=112+14 + ops_rel8["jng"]=112+14 + ops_rel8["jg"]=112+15 + ops_rel8["jnle"]=112+15 + # single-byte opcodes without operands + ops_sb["stosb"]=170 + ops_sb["stosw"]=171 + ops_sb["lodsb"]=172 + ops_sb["lodsw"]=173 + ops_sb["ret"]=195 + ops_sb["retf"]=203 +} +# error string to insert into listing +function err(str) { + errors++ + printf("ERR: %s:\n", str) +} +# submit a assembling result to output +# set label of current line to off +function submit(off) { + if (of=="lst") printf("%04X %-10s %s\n", off, hex, $0) + if (of=="hex" && hex) printf("%s", hex) + if (label) { + if (of=="sym") printf("%s\tsym\t%d\n", label, off) + if (label in prevsym && prevsym[label]!=off) { + err(label " different during second pass") + } + sym[label]=off + } + pos=pos+length(hex)/2 + hex="" +} +# gets a symbol's values +function getsym(name) { + if (substr(name,1,1)=="0" || int(name)) { + return int(name) + } + if (substr(name,1,1)==".") name = plabel name + if (name in prevsym) { + return prevsym[name] + } else if (name in sym) { + return sym[name] + } + return 0 +} +function pushb(val) { + while (val<0) val=val+256 + hex=hex sprintf("%02X",val) +} +function pushw(val) { + while (val<0) val=val+65536 + t=sprintf("%04X",val) + hex=hex substr(t,3) substr(t,1,2) +} +function modrm(str, spare, rs) { + mod=3 + rm=0 + if (str in rs) { + mod=3 + rm=rs[str] + } else if (substr(str,1,1)=="[") { + rmap="" + gsub(/^\[|\]$/, "", str) + split(str,rmp,"+") + disp=0 + for (i in rmp) { + if (rmp[i] in r16) { + rmap=rmap rmp[i] + } else { + disp = disp + getsym(rmp[i]) + mod=2 + } + } + if (!rmap) { + mod=0 + rm=6 + } else if (rmap in modstr) { + rm=modstr[rmap] + } else { + err("Bad modR/M") + } + } + #print("modR/M:", mod, spare, rm) + pushb(mod*64+spare*8+rm) + if (mod == 1) { + pushb(disp) + } else if (mod == 2 || (mod == 0 && rm=6)) { + pushw(disp) + } + +} +# dont process empty lines or comment lines +/^( |\t)*;/ || /^( |\t)*$/ { + if (of=="lst") printf("%16s%s\n","",$0) + next +} +# load symbols from previous pass +$2=="sym" { + prevsym[$1]=int($3) + #printf("; %s (%s=%X)\n", $0,$1,prevsym[$1]) + next +} +# Start parsing the line +# and set up per-line vars +{ + label=$1 + gsub(/:$/, "",label) + opn=2 + split("", b, ":") + byteop=1 + wordop=1 + dwordop=1 +} +# no label on line! fixup +/^ / || /^\t/ { + label="" + opn=1 +} +{ + # implement local labels + if (substr(label,1,1)==".") { + label=plabel label + } else if (label) { + plabel=label + } + # take note if we got a instruction size specifier + op=$(opn) + if($(opn+1)=="byte") { + wordop=0 + dwordop=0 + opn++ + } else if ($(opn+1)=="word") { + byteop=0 + dwordop=0 + opn++ + } else if ($(opn+1)=="dword") { + byteop=0 + wordop=0 + opn++ + } + split("", a, ":") + c=0 + for (i=opn+1;i<=NF;i++) { + if (substr($(i),1,1)==";") break + a[++c]=$(i) + if (substr($(i),1,1)=="\"") { + do { + i++ + j=index($(i), "\"") + if (j) { + a[c]=a[c] " " substr($(i), 1, j) + break + } else { + a[c]=a[c] " " $(i) + } + } while($(i)) + } else { + gsub(/,$/, "", a[c]) + } + } + op1=a[1] + op2=a[2] + rm1=(op1 in r16) || (op1 in r8) || substr(op1,1,1)=="[" + rm2=(op2 in r16) || (op2 in r8) || substr(op2,1,1)=="[" +} +# the source line is parsed by here: +# - op: opcode name +# - a: array of operands, starting with 1 +# - c: number of operands +# - byteop, wordop, dwordop: test before encoding, all 1 per default +# pseudo-opcodes +op=="cpu" { + x86=(a[1]=="8088"||a[1]=="8086") + x80=(a[1]=="8080"||a[1]=="z80") + z80=(a[1]=="z80") + submit(pos);next +} +op=="org" { + pos=getsym(a[1]) + submit(pos);next +} +op=="equ" { + val=getsym(a[1]) + submit(val);next +} +op=="db" { + for(i=1;i<=c;i++) { + if (substr(a[i],1,1)=="\"") { + for(j=2;j reg + if (rm1 && op2 in r8) { + pushb(136) # 88 + modrm(op1, r8[op2], r8) + } else if (rm1 && op2 in r16) { + pushb(137) # 89 + modrm(op1, r16[op2], r16) + } else if (op1 in r8 && rm2) { + pushb(138) + modrm(op2, r8[op1], r8) + } else if (op1 in r16 && rm2) { + pushb(139) + modrm(op2, r16[op1], r16) + # modrm <-> sreg + } else if (rm1 && op2 in sreg) { + pushb(140) + modrm(op1, sreg[op2], r16) + } else if (rm2 && op1 in sreg) { + pushb(142) + modrm(op2, sreg[op1], r16) + # reg <- imm + } else if (op1 in r8) { + pushb(176+r8[op1]) + pushb(getsym(op2)) + } else if (op1 in r16) { + pushb(184+r16[op1]) + pushw(getsym(op2)) + } +} +x86 && op=="push" && op1 in r16 { + pushb(80+r16[op1]) +} +x86 && op=="pop" && op1 in r16 { + pushb(88+r16[op1]) +} +x86 && op=="test" && op2 in r8 { # 84 + pushb(132) + modrm(op1, r8[op2], r8) +} +x86 && op=="int" { # CD + pushb(205) + pushb(getsym(op1)) +} +x86 && op=="jmp" { + pushb(233) + v=getsym(op1)-(pos+3) + pushw(v) +} +x86 && op=="call" { + pushb(232) + v=getsym(op1)-(pos+3) + pushw(v) +} +# opcodes with rel8 encoding +x86 && op in ops_rel8 { + pushb(ops_rel8[op]) + v=getsym(op1)-(pos+2) + pushb(v) +} +# opcodes without arguments +x86 && op in ops_sb { + pushb(ops_sb[op]) +} +{ + if (!hex) err("no encoding found") + submit(pos) +} +END{ + if (of=="hex") printf("\n") + if (errors) exit(1) +}