Improve assembler: can now properly assemble lain.com

This commit is contained in:
Nero 2023-01-01 20:44:43 +00:00
parent 77b1ad7cef
commit 5100d1d84e
5 changed files with 2170 additions and 91 deletions

View File

@ -23,7 +23,8 @@ default: fd$(FLOPPY).img
# COM programs # COM programs
%.com: src/%.asm src/*.inc %.com: src/%.asm src/*.inc
$(NASM) $(NASM_ARGS) -Isrc -l $(@:.com=.lst) -o $@ $< $(AS) $@ $<
# $(NASM) $(NASM_ARGS) -Isrc -l $(@:.com=.lst) -o $@ $<
# Bootloader images # Bootloader images
%.bs: boot/%.asm %.bs: boot/%.asm

View File

@ -1,4 +1,7 @@
#!/bin/sh #!/bin/sh
outfile="$1" outfile="$1"
infile="$2" infile="$2"
( awk -f host/asm.awk -v of=sym <"$infile"; cat "$infile") | awk -f host/asm.awk -v of=lst listing="${outfile%.*}.lst"
( awk -f host/asm.awk -v of=sym <"$infile"; cat "$infile") | awk -f host/asm.awk -v of=lst >"$listing"
grep -A1 '\*\*\*\*' "$listing" && exit 1
cut -d' ' -f2 "$listing" | xxd -r -p >"$outfile"

View File

@ -83,8 +83,16 @@ BEGIN {
ops_rel8["jng"]=112+14 ops_rel8["jng"]=112+14
ops_rel8["jg"]=112+15 ops_rel8["jg"]=112+15
ops_rel8["jnle"]=112+15 ops_rel8["jnle"]=112+15
ops_rel8["loopne"]=224
ops_rel8["loopnz"]=224
ops_rel8["loope"]=225
ops_rel8["loopz"]=225
ops_rel8["loop"]=226
ops_rel8["jmp"]=235
# single-byte opcodes without operands # single-byte opcodes without operands
ops_sb["nop"]=128+16 ops_sb["nop"]=128+16
ops_sb["movsb"]=164
ops_sb["movsw"]=165
ops_sb["stosb"]=170 ops_sb["stosb"]=170
ops_sb["stosw"]=171 ops_sb["stosw"]=171
ops_sb["lodsb"]=172 ops_sb["lodsb"]=172
@ -92,6 +100,17 @@ BEGIN {
ops_sb["ret"]=195 ops_sb["ret"]=195
ops_sb["retf"]=203 ops_sb["retf"]=203
ops_sb["hlt"]=244 ops_sb["hlt"]=244
# prefix instructions
prefix["es"]=38
prefix["cs"]=46
prefix["ss"]=54
prefix["ds"]=64
prefix["lock"]=240
prefix["rep"]=243
prefix["repe"]=242
prefix["repz"]=242
prefix["repne"]=243
prefix["repnz"]=243
} }
# error string to insert into listing # error string to insert into listing
function err(str) { function err(str) {
@ -118,6 +137,10 @@ function getsym(name) {
if (substr(name,1,1)=="0" || int(name)) { if (substr(name,1,1)=="0" || int(name)) {
return int(name) return int(name)
} }
if (name in r8 || name in r16 || name in sreg) {
err("Register name " name "is not a valid immediate")
return 0
}
if (substr(name,1,1)==".") name = plabel name if (substr(name,1,1)==".") name = plabel name
if (name in prevsym) { if (name in prevsym) {
return prevsym[name] return prevsym[name]
@ -126,25 +149,22 @@ function getsym(name) {
} }
return 0 return 0
} }
# encode a signed byte function push_byte(val) {
function pushb(val) {
#print("; pushb " val) #print("; pushb " val)
if (val<0 || val>=256) err("Value " val " does not fit in byte") if (val<0 || val>=256) err("Value " val " does not fit in byte")
hex=hex sprintf("%02X",val) hex=hex sprintf("%02X",val)
} }
# encode a signed byte (for rel8) function push_signed_byte(val) {
function pushsb(val) {
while (val < 0) val = val + 256 while (val < 0) val = val + 256
pushb(val) push_byte(val)
} }
# encode a word function push_word(val) {
function pushw(val) {
while (val<0) val=val+65536 while (val<0) val=val+65536
t=sprintf("%04X",val) t=sprintf("%04X",val)
hex=hex substr(t,3) substr(t,1,2) hex=hex substr(t,3) substr(t,1,2)
} }
# encode a modrm byte # rs is the register set (r8, r16) that can show up in str
function modrm(str, spare, rs) { function push_modrm(str, spare, rs) {
mod=3 mod=3
rm=0 rm=0
if (str in rs) { if (str in rs) {
@ -173,28 +193,43 @@ function modrm(str, spare, rs) {
} }
} }
#print("; modR/M:", mod, spare, rm) #print("; modR/M:", mod, spare, rm)
pushb(mod*64+spare*8+rm) push_byte(mod*64+spare*8+rm)
if (mod == 1) { if (mod == 1) {
pushb(disp) push_byte(disp)
} else if (mod == 2 || (mod == 0 && rm == 6)) { } else if (mod == 2 || (mod == 0 && rm == 6)) {
pushw(disp) push_word(disp)
} }
} }
# common encoding: two operands, one is modrm, other in spare field # common encoding: two operands, one is modrm, other is register via spare field
# last two bits of opcode specify width (byte/word) and whether modrm operand is first or second # last two bits of opcode specify width (byte/word) and whether modrm operand is first or second
function enc(opcode) { function push_op_modrm(opcode) {
if (rm1 && byteop && op2 in r8) {
push_byte(opcode)
push_modrm(op1, r8[op2], r8)
} else if (rm1 && wordop && op2 in r16) {
push_byte(opcode+1)
push_modrm(op1, r16[op2], r16)
} else if (rm2 && byteop && op1 in r8) {
push_byte(opcode+2)
push_modrm(op2, r8[op1], r8)
} else if (rm2 && wordop && op1 in r16) {
push_byte(opcode+2+1)
push_modrm(op2, r16[op1], r16)
}
} }
# common encoding: one operand encoded as modrm with fixed spare field # common encoding: one operand encoded as modrm with fixed spare field
# operand can be byte or word, encoded in last bit of opcode # operand can be byte or word, encoded in last bit of opcode
function encfs(opcode, spare) { function push_op_fixed_spare(opcode, spare) {
if (wordop) { if (byteop) {
pushb(opcode) push_byte(opcode)
modrm(op1, spare, r16) push_modrm(op1, spare, r8)
} else if (byteop) { return 1
pushb(opcode) } else if (wordop) {
modrm(op1, spare, r8) push_byte(opcode+1)
push_modrm(op1, spare, r16)
return 2
} }
return 0
} }
# dont process empty lines or comment lines # dont process empty lines or comment lines
@ -233,6 +268,12 @@ $2=="sym" {
} }
# take note if we got a instruction size specifier # take note if we got a instruction size specifier
op=$(opn) op=$(opn)
if (op in prefix) {
push_byte(prefix[op])
opn=opn+1
op=$(opn)
}
if($(opn+1)=="byte" || $(opn+1)=="short") { if($(opn+1)=="byte" || $(opn+1)=="short") {
wordop=0 wordop=0
dwordop=0 dwordop=0
@ -303,115 +344,113 @@ op=="db" {
for(i=1;i<=c;i++) { for(i=1;i<=c;i++) {
if (substr(a[i],1,1)=="\"") { if (substr(a[i],1,1)=="\"") {
for(j=2;j<length(a[i]);j++) { for(j=2;j<length(a[i]);j++) {
pushb(_ord_[substr(a[i],j,1)]) push_byte(_ord_[substr(a[i],j,1)])
} }
} else pushb(getsym(a[i])) } else push_byte(getsym(a[i]))
} }
} }
op=="dw" { op=="dw" {
for(i=1;i<=c;i++) pushw(getsym(a[i])) for(i=1;i<=c;i++) push_word(getsym(a[i]))
} }
# arithmetics: ADD, SUB, XOR etc # arithmetics: ADD, SUB, XOR etc
op in alu { op in alu {
# modrm <- reg push_op_modrm(alu[op]*8)
if (rm1 && op2 in r8) { if (!hex) {
pushb(0+alu[op]*8) size=push_op_fixed_spare(0x80, alu[op])
modrm(op1, r8[op2], r8) if (size==1) {
} else if (rm1 && op2 in r16) { push_byte(getsym(op2))
pushb(1+alu[op]*8) } else if (size==2) {
modrm(op1, r16[op2], r16) push_word(getsym(op2))
# reg <- modrm
} else if (op1 in r8 && rm2) {
pushb(2+alu[op]*8)
modrm(op2, r8[op1], r8)
} else if (op1 in r16 && rm2) {
pushb(3+alu[op]*8)
modrm(op2, r16[op1], r16)
# modrm <- imm
} else if (byteop && rm1 || op1 in r8) {
pushb(128) # 80
modrm(op1, alu[op], r8)
pushb(getsym(op2))
} else if (wordop && rm1 || op1 in r16) {
pushb(129) # 81
modrm(op1, alu[op], r16)
pushw(getsym(op2))
} }
} }
}
# no idea why this made this extra, this is a AND without storing the result
op=="test" {
push_op_modrm(132) # 84
if (!hex) {
size=push_op_fixed_spare(246, 7)
if (size==1) {
push_byte(getsym(op2))
} else if (size==2) {
push_word(getsym(op2))
}
}
}
op=="sar" && op2=="1" {
push_op_fixed_spare(208, 0) # D0 /0
}
op=="inc" && rm1 {
push_op_fixed_spare(254, 0) # FE /0
}
# MOV variants # MOV variants
op=="mov" { op=="mov" {
# modrm <-> reg # modrm <-> reg
if (rm1 && op2 in r8) { if (rm1 && op2 in r8) {
pushb(136) # 88 push_byte(136) # 88
modrm(op1, r8[op2], r8) push_modrm(op1, r8[op2], r8)
} else if (rm1 && op2 in r16) { } else if (rm1 && op2 in r16) {
pushb(137) # 89 push_byte(137) # 89
modrm(op1, r16[op2], r16) push_modrm(op1, r16[op2], r16)
} else if (op1 in r8 && rm2) { } else if (op1 in r8 && rm2) {
pushb(138) push_byte(138)
modrm(op2, r8[op1], r8) push_modrm(op2, r8[op1], r8)
} else if (op1 in r16 && rm2) { } else if (op1 in r16 && rm2) {
pushb(139) push_byte(139)
modrm(op2, r16[op1], r16) push_modrm(op2, r16[op1], r16)
# modrm <-> sreg # modrm <-> sreg
} else if (rm1 && op2 in sreg) { } else if (rm1 && op2 in sreg) {
pushb(140) push_byte(140)
modrm(op1, sreg[op2], r16) push_modrm(op1, sreg[op2], r16)
} else if (rm2 && op1 in sreg) { } else if (rm2 && op1 in sreg) {
pushb(142) push_byte(142)
modrm(op2, sreg[op1], r16) push_modrm(op2, sreg[op1], r16)
# reg <- imm # reg <- imm
} else if (op1 in r8) { } else if (op1 in r8) {
pushb(176+r8[op1]) push_byte(176+r8[op1])
pushb(getsym(op2)) push_byte(getsym(op2))
} else if (op1 in r16) { } else if (op1 in r16) {
pushb(184+r16[op1]) push_byte(184+r16[op1])
pushw(getsym(op2)) push_word(getsym(op2))
# modrm <- imm # modrm <- imm
} else if (byteop && rm1) { } else if (byteop && rm1) {
pushb(198) push_byte(198)
modrm(rm1, 0, r16) push_modrm(rm1, 0, r16)
} else if (wordop && rm1) { } else if (wordop && rm1) {
pushb(199) push_byte(199)
modrm(rm1, 0, r16) push_modrm(rm1, 0, r16)
} }
} }
op=="push" && op1 in r16 { op=="push" && op1 in r16 {
pushb(80+r16[op1]) push_byte(80+r16[op1])
} }
op=="pop" && op1 in r16 { op=="pop" && op1 in r16 {
pushb(88+r16[op1]) push_byte(88+r16[op1])
} }
op=="test" && op2 in r8 { # 84 op=="int" && op1=="3" { # CC breakpoint
pushb(132) push_byte(204)
modrm(op1, r8[op2], r8)
} }
op=="int" { # CD op=="int" { # CD
pushb(205) push_byte(205)
pushb(getsym(op1)) push_byte(getsym(op1))
}
op=="jmp" && byteop {
pushb(235)
pushsb(getsym(op1)-(pos+2))
} }
op=="jmp" && wordop { op=="jmp" && wordop {
pushb(233) push_byte(233)
pushw(getsym(op1)-(pos+3)) push_word(getsym(op1)-(pos+3))
} }
op=="call" { op=="call" && wordop {
pushb(232) push_byte(232)
pushw(getsym(op1)-(pos+3)) push_word(getsym(op1)-(pos+3))
} }
op=="neg" { op=="neg" {
encfs(246, 3) push_op_fixed_spare(246, 3)
} }
# opcodes with rel8 encoding # opcodes with rel8 encoding
op in ops_rel8 && byteop { op in ops_rel8 && byteop && c==1 {
pushb(ops_rel8[op]) push_byte(ops_rel8[op])
pushb(getsym(op1)-(pos+2)) push_signed_byte(getsym(op1)-(pos+2))
} }
# opcodes without arguments # opcodes without arguments
op in ops_sb { pushb(ops_sb[op]) } op in ops_sb { push_byte(ops_sb[op]) }
{ {
if (!hex) err("no encoding found") if (!hex) err("no encoding found")
submit(pos) submit(pos)

2
host/bin2db.sh Executable file
View File

@ -0,0 +1,2 @@
#!/bin/sh
xxd -c8 -g1 "$1"|cut -d' ' -f2-9|sed 's/[0-9A-Fa-f]\{2\}/0x&/g;s/ /, /g;s/^/ db /'

2034
src/lain.asm Normal file

File diff suppressed because it is too large Load Diff