Improve output format and add more encodings

This commit is contained in:
Nero 2022-01-07 22:06:03 +00:00
parent e408356e4c
commit 074c293a04
1 changed files with 26 additions and 25 deletions

51
asm.awk
View File

@ -97,12 +97,12 @@ BEGIN {
# error string to insert into listing
function err(str) {
errors++
printf("ERR: %s:\n", str)
if (of=="lst") printf("**** %s:\n", str)
}
# submit a assembling result to output
# set label of current line to off
function submit(off) {
if (of=="lst") printf("%04X %-10s %s\n", off, hex, $0)
if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0)
if (of=="hex" && hex) printf("%s", hex)
if (label) {
if (of=="sym") printf("%s\tsym\t%d\n", label, off)
@ -174,8 +174,8 @@ function modrm(str, spare, rs) {
}
# dont process empty lines or comment lines
/^( |\t)*;/ || /^( |\t)*$/ {
if (of=="lst") printf("%16s%s\n","",$0)
/^( |\t)*(;|%)/ || /^( |\t)*$/ {
if (of=="lst") printf("%24s%s\n","",$0)
next
}
# load symbols from previous pass
@ -273,39 +273,38 @@ op=="db" {
for(j=2;j<length(a[i]);j++) {
pushb(_ord_[substr(a[i],j,1)])
}
} else {
pushb(getsym(a[i]))
}
} else pushb(getsym(a[i]))
}
}
op=="dw" {
for(i=1;i<=c;i++) {
pushw(getsym(a[i]))
}
for(i=1;i<=c;i++) pushw(getsym(a[i]))
}
# x86 opcodes in approx. encoding order
# x86 opcodes, ordered by my favourite first
# arithmetics: ADD, SUB, XOR etc
x86 && op in alu {
# modrm <- reg
if (rm1 && op2 in r8) {
pushb(0+alu[op]*8)
modrm(op1, r8[op2], r8)
} else if (rm1 && op2 in r16) {
pushb(1+alu[op]*8)
modrm(op1, r16[op2], r16)
# reg <- modrm
} else if (op1 in r8 && rm2) {
pushb(2+alu[op]*8)
modrm(op2, r8[op1], r8)
} else if (op1 in r16 && rm2) {
pushb(3+alu[op]*8)
modrm(op2, r16[op1], r16)
} else if (op1 in r16) {
pushb(129) # 81
modrm(op1, alu[op], r16)
pushw(getsym(op2))
} else if (op1 in r8) {
# modrm <- imm
} else if (byteop && rm1 || op1 in r8) {
pushb(128) # 80
modrm(op1, alu[op], r8)
pushw(getsym(op2))
} else if (wordop && rm1 || op1 in r16) {
pushb(129) # 81
modrm(op1, alu[op], r16)
pushw(getsym(op2))
}
}
# MOV variants
@ -337,6 +336,13 @@ x86 && op=="mov" {
} else if (op1 in r16) {
pushb(184+r16[op1])
pushw(getsym(op2))
# modrm <- imm
} else if (byteop && rm1) {
pushb(198)
modrm(rm1, 0, r16)
} else if (wordop && rm1) {
pushb(199)
modrm(rm1, 0, r16)
}
}
x86 && op=="push" && op1 in r16 {
@ -355,24 +361,19 @@ x86 && op=="int" { # CD
}
x86 && op=="jmp" {
pushb(233)
v=getsym(op1)-(pos+3)
pushw(v)
pushw(getsym(op1)-(pos+3))
}
x86 && op=="call" {
pushb(232)
v=getsym(op1)-(pos+3)
pushw(v)
pushw(getsym(op1)-(pos+3))
}
# opcodes with rel8 encoding
x86 && op in ops_rel8 {
pushb(ops_rel8[op])
v=getsym(op1)-(pos+2)
pushb(v)
pushb(getsym(op1)-(pos+2))
}
# opcodes without arguments
x86 && op in ops_sb {
pushb(ops_sb[op])
}
x86 && op in ops_sb { pushb(ops_sb[op]) }
{
if (!hex) err("no encoding found")
submit(pos)