Improve output format and add more encodings

This commit is contained in:
Nero 2022-01-07 22:06:03 +00:00
parent e408356e4c
commit 074c293a04

51
asm.awk
View File

@ -97,12 +97,12 @@ BEGIN {
# error string to insert into listing # error string to insert into listing
function err(str) { function err(str) {
errors++ errors++
printf("ERR: %s:\n", str) if (of=="lst") printf("**** %s:\n", str)
} }
# submit a assembling result to output # submit a assembling result to output
# set label of current line to off # set label of current line to off
function submit(off) { function submit(off) {
if (of=="lst") printf("%04X %-10s %s\n", off, hex, $0) if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0)
if (of=="hex" && hex) printf("%s", hex) if (of=="hex" && hex) printf("%s", hex)
if (label) { if (label) {
if (of=="sym") printf("%s\tsym\t%d\n", label, off) if (of=="sym") printf("%s\tsym\t%d\n", label, off)
@ -174,8 +174,8 @@ function modrm(str, spare, rs) {
} }
# dont process empty lines or comment lines # dont process empty lines or comment lines
/^( |\t)*;/ || /^( |\t)*$/ { /^( |\t)*(;|%)/ || /^( |\t)*$/ {
if (of=="lst") printf("%16s%s\n","",$0) if (of=="lst") printf("%24s%s\n","",$0)
next next
} }
# load symbols from previous pass # load symbols from previous pass
@ -273,39 +273,38 @@ op=="db" {
for(j=2;j<length(a[i]);j++) { for(j=2;j<length(a[i]);j++) {
pushb(_ord_[substr(a[i],j,1)]) pushb(_ord_[substr(a[i],j,1)])
} }
} else { } else pushb(getsym(a[i]))
pushb(getsym(a[i]))
}
} }
} }
op=="dw" { op=="dw" {
for(i=1;i<=c;i++) { for(i=1;i<=c;i++) pushw(getsym(a[i]))
pushw(getsym(a[i]))
} }
} # x86 opcodes, ordered by my favourite first
# x86 opcodes in approx. encoding order
# arithmetics: ADD, SUB, XOR etc # arithmetics: ADD, SUB, XOR etc
x86 && op in alu { x86 && op in alu {
# modrm <- reg
if (rm1 && op2 in r8) { if (rm1 && op2 in r8) {
pushb(0+alu[op]*8) pushb(0+alu[op]*8)
modrm(op1, r8[op2], r8) modrm(op1, r8[op2], r8)
} else if (rm1 && op2 in r16) { } else if (rm1 && op2 in r16) {
pushb(1+alu[op]*8) pushb(1+alu[op]*8)
modrm(op1, r16[op2], r16) modrm(op1, r16[op2], r16)
# reg <- modrm
} else if (op1 in r8 && rm2) { } else if (op1 in r8 && rm2) {
pushb(2+alu[op]*8) pushb(2+alu[op]*8)
modrm(op2, r8[op1], r8) modrm(op2, r8[op1], r8)
} else if (op1 in r16 && rm2) { } else if (op1 in r16 && rm2) {
pushb(3+alu[op]*8) pushb(3+alu[op]*8)
modrm(op2, r16[op1], r16) modrm(op2, r16[op1], r16)
} else if (op1 in r16) { # modrm <- imm
pushb(129) # 81 } else if (byteop && rm1 || op1 in r8) {
modrm(op1, alu[op], r16)
pushw(getsym(op2))
} else if (op1 in r8) {
pushb(128) # 80 pushb(128) # 80
modrm(op1, alu[op], r8) modrm(op1, alu[op], r8)
pushw(getsym(op2)) pushw(getsym(op2))
} else if (wordop && rm1 || op1 in r16) {
pushb(129) # 81
modrm(op1, alu[op], r16)
pushw(getsym(op2))
} }
} }
# MOV variants # MOV variants
@ -337,6 +336,13 @@ x86 && op=="mov" {
} else if (op1 in r16) { } else if (op1 in r16) {
pushb(184+r16[op1]) pushb(184+r16[op1])
pushw(getsym(op2)) pushw(getsym(op2))
# modrm <- imm
} else if (byteop && rm1) {
pushb(198)
modrm(rm1, 0, r16)
} else if (wordop && rm1) {
pushb(199)
modrm(rm1, 0, r16)
} }
} }
x86 && op=="push" && op1 in r16 { x86 && op=="push" && op1 in r16 {
@ -355,24 +361,19 @@ x86 && op=="int" { # CD
} }
x86 && op=="jmp" { x86 && op=="jmp" {
pushb(233) pushb(233)
v=getsym(op1)-(pos+3) pushw(getsym(op1)-(pos+3))
pushw(v)
} }
x86 && op=="call" { x86 && op=="call" {
pushb(232) pushb(232)
v=getsym(op1)-(pos+3) pushw(getsym(op1)-(pos+3))
pushw(v)
} }
# opcodes with rel8 encoding # opcodes with rel8 encoding
x86 && op in ops_rel8 { x86 && op in ops_rel8 {
pushb(ops_rel8[op]) pushb(ops_rel8[op])
v=getsym(op1)-(pos+2) pushb(getsym(op1)-(pos+2))
pushb(v)
} }
# opcodes without arguments # opcodes without arguments
x86 && op in ops_sb { x86 && op in ops_sb { pushb(ops_sb[op]) }
pushb(ops_sb[op])
}
{ {
if (!hex) err("no encoding found") if (!hex) err("no encoding found")
submit(pos) submit(pos)