Implement addition expressions, critical expressions, proper modrm

This commit is contained in:
Nero 2023-01-02 01:37:05 +00:00
parent 6f8a24927a
commit 61d3688581

View File

@ -42,6 +42,7 @@ BEGIN {
modstr["dibp"]=3 modstr["dibp"]=3
modstr["si"]=4 modstr["si"]=4
modstr["di"]=5 modstr["di"]=5
modstr["bp"]=6
modstr["bx"]=7 modstr["bx"]=7
# ALU operations # ALU operations
alu["add"]=0 alu["add"]=0
@ -119,6 +120,7 @@ function err(str) {
} }
# submit a assembling result to output # submit a assembling result to output
# set label of current line to off # set label of current line to off
# this outputs a listing line
function submit(off) { function submit(off) {
if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0) if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0)
if (of=="hex" && hex) printf("%s", hex) if (of=="hex" && hex) printf("%s", hex)
@ -132,23 +134,61 @@ function submit(off) {
pos=pos+length(hex)/2 pos=pos+length(hex)/2
hex="" hex=""
} }
# gets a symbol's values # evaluate an expression
function getsym(name) { # globals set:
if (substr(name,1,1)=="0" || int(name)) { # ecrit: value known after first pass, 1=yes, 0=no
return int(name) # eregs: concatenated list of registers that add to this expr (for modrm)
function expr(str) {
val=0
sign=1
ecrit=1
eregs=""
gsub(/-/, "+-", str)
split(str, ep,"+")
for (k in ep) {
if (substr(ep[k],1,1)=="-") {
gsub(/^-/, "", ep[k])
sign = -1
} else {
sign = 1
} }
if (name in r8 || name in r16 || name in sreg) { if (substr(ep[k],1,1)==".") ep[k] = plabel ep[k]
err("Register name " name " is not a valid immediate") if (ep[k] in r8 || ep[k] in r16 || ep[k] in sreg) {
return 0 if (sign > 0) {
eregs=eregs ep[k]
} else {
err("Registers cannot be subtractive in expressions")
} }
if (substr(name,1,1)==".") name = plabel name } else if (match(ep[k], /^[0-9]/)) {
if (name in prevsym) { if (match(ep[k], /h$/)) {
return prevsym[name] ep[i]="0x" ep[k]
} else if (name in sym) {
return sym[name]
} }
return 0 val = val + sign*int(ep[k])
} else if (ep[k] in sym) {
val = val + sign*sym[ep[k]]
} else if (ep[k] in prevsym) {
val = val + sign*prevsym[ep[k]]
ecrit=0
} else {
err("Undefined label " ep[k])
ecrit=0
}
}
return val
} }
function imm(str) {
val = expr(str)
if (eregs) err("Registers not allowed here")
return val
}
function crit(str) {
val = imm(str)
if (!ecrit) err("Labels from below not allowed here")
return val
}
function push_byte(val) { function push_byte(val) {
#print("; pushb " val) #print("; pushb " val)
if (val<0 || val>=256) err("Value " val " does not fit in byte") if (val<0 || val>=256) err("Value " val " does not fit in byte")
@ -165,29 +205,27 @@ function push_word(val) {
} }
# rs is the register set (r8, r16) that can show up in str # rs is the register set (r8, r16) that can show up in str
function push_modrm(str, spare, rs) { function push_modrm(str, spare, rs) {
mod=3 mod=0
rm=0 rm=0
if (str in rs) { if (str in rs) {
mod=3 mod=3
rm=rs[str] rm=rs[str]
} else if (substr(str,1,1)=="[") { } else if (substr(str,1,1)=="[") {
rmap=""
gsub(/^\[|\]$/, "", str) gsub(/^\[|\]$/, "", str)
split(str,rmp,"+") disp=expr(str)
disp=0 if (!ecrit || disp) {
for (i in rmp) {
if (rmp[i] in r16) {
rmap=rmap rmp[i]
} else {
disp = disp + getsym(rmp[i])
mod=2 mod=2
} }
} if (!eregs) {
if (!rmap) {
mod=0 mod=0
rm=6 rm=6
} else if (rmap in modstr) { } else if (eregs in modstr) {
rm=modstr[rmap] rm=modstr[eregs]
# [BP] is unencodable, this combination is read as [0000]
# so we upgrade [BP] to [BP+00]
if (mod==0 && rm==6) {
mod=1
}
} else { } else {
err("Bad modR/M") err("Bad modR/M")
} }
@ -333,11 +371,11 @@ op=="cpu" {
next next
} }
op=="org" { op=="org" {
pos=getsym(a[1]) pos=crit(a[1])
submit(pos);next submit(pos);next
} }
op=="equ" { op=="equ" {
val=getsym(a[1]) val=crit(a[1])
submit(val);next submit(val);next
} }
op=="db" { op=="db" {
@ -346,11 +384,11 @@ op=="db" {
for(j=2;j<length(a[i]);j++) { for(j=2;j<length(a[i]);j++) {
push_byte(_ord_[substr(a[i],j,1)]) push_byte(_ord_[substr(a[i],j,1)])
} }
} else push_byte(getsym(a[i])) } else push_byte(imm(a[i]))
} }
} }
op=="dw" { op=="dw" {
for(i=1;i<=c;i++) push_word(getsym(a[i])) for(i=1;i<=c;i++) push_word(imm(a[i]))
} }
# arithmetics: ADD, SUB, XOR etc # arithmetics: ADD, SUB, XOR etc
op in alu { op in alu {
@ -358,9 +396,9 @@ op in alu {
if (!hex) { if (!hex) {
size=push_op_fixed_spare(0x80, alu[op]) size=push_op_fixed_spare(0x80, alu[op])
if (size==1) { if (size==1) {
push_byte(getsym(op2)) push_byte(imm(op2))
} else if (size==2) { } else if (size==2) {
push_word(getsym(op2)) push_word(imm(op2))
} }
} }
} }
@ -370,9 +408,9 @@ op=="test" {
if (!hex) { if (!hex) {
size=push_op_fixed_spare(246, 0) size=push_op_fixed_spare(246, 0)
if (size==1) { if (size==1) {
push_byte(getsym(op2)) push_byte(imm(op2))
} else if (size==2) { } else if (size==2) {
push_word(getsym(op2)) push_word(imm(op2))
} }
} }
} }
@ -407,10 +445,10 @@ op=="mov" {
# reg <- imm # reg <- imm
} else if (op1 in r8) { } else if (op1 in r8) {
push_byte(176+r8[op1]) push_byte(176+r8[op1])
push_byte(getsym(op2)) push_byte(imm(op2))
} else if (op1 in r16) { } else if (op1 in r16) {
push_byte(184+r16[op1]) push_byte(184+r16[op1])
push_word(getsym(op2)) push_word(imm(op2))
# modrm <- imm # modrm <- imm
} else if (byteop && rm1) { } else if (byteop && rm1) {
push_byte(198) push_byte(198)
@ -431,15 +469,22 @@ op=="int" && op1=="3" { # CC breakpoint
} }
op=="int" { # CD op=="int" { # CD
push_byte(205) push_byte(205)
push_byte(getsym(op1)) push_byte(imm(op1))
} }
op=="jmp" && wordop { op=="jmp" {
val=imm(op1)-(pos+2)
if (val>-127 && val<128 && ecrit) {
push_byte(235)
push_signed_byte(val)
} else {
push_byte(233) push_byte(233)
push_word(getsym(op1)-(pos+3)) push_word(val-1)
}
submit(pos);next
} }
op=="call" && wordop { op=="call" && wordop {
push_byte(232) push_byte(232)
push_word(getsym(op1)-(pos+3)) push_word(imm(op1)-(pos+3))
} }
op=="neg" { op=="neg" {
push_op_fixed_spare(246, 3) push_op_fixed_spare(246, 3)
@ -447,7 +492,7 @@ op=="neg" {
# opcodes with rel8 encoding # opcodes with rel8 encoding
op in ops_rel8 && byteop && c==1 { op in ops_rel8 && byteop && c==1 {
push_byte(ops_rel8[op]) push_byte(ops_rel8[op])
push_signed_byte(getsym(op1)-(pos+2)) push_signed_byte(imm(op1)-(pos+2))
} }
# opcodes without arguments # opcodes without arguments
op in ops_sb { push_byte(ops_sb[op]) } op in ops_sb { push_byte(ops_sb[op]) }