Improve assembler: can now properly assemble lain.com
This commit is contained in:
parent
77b1ad7cef
commit
5100d1d84e
3
Makefile
3
Makefile
@ -23,7 +23,8 @@ default: fd$(FLOPPY).img
|
|||||||
|
|
||||||
# COM programs
|
# COM programs
|
||||||
%.com: src/%.asm src/*.inc
|
%.com: src/%.asm src/*.inc
|
||||||
$(NASM) $(NASM_ARGS) -Isrc -l $(@:.com=.lst) -o $@ $<
|
$(AS) $@ $<
|
||||||
|
# $(NASM) $(NASM_ARGS) -Isrc -l $(@:.com=.lst) -o $@ $<
|
||||||
|
|
||||||
# Bootloader images
|
# Bootloader images
|
||||||
%.bs: boot/%.asm
|
%.bs: boot/%.asm
|
||||||
|
5
host/as
5
host/as
@ -1,4 +1,7 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
outfile="$1"
|
outfile="$1"
|
||||||
infile="$2"
|
infile="$2"
|
||||||
( awk -f host/asm.awk -v of=sym <"$infile"; cat "$infile") | awk -f host/asm.awk -v of=lst
|
listing="${outfile%.*}.lst"
|
||||||
|
( awk -f host/asm.awk -v of=sym <"$infile"; cat "$infile") | awk -f host/asm.awk -v of=lst >"$listing"
|
||||||
|
grep -A1 '\*\*\*\*' "$listing" && exit 1
|
||||||
|
cut -d' ' -f2 "$listing" | xxd -r -p >"$outfile"
|
||||||
|
217
host/asm.awk
217
host/asm.awk
@ -83,8 +83,16 @@ BEGIN {
|
|||||||
ops_rel8["jng"]=112+14
|
ops_rel8["jng"]=112+14
|
||||||
ops_rel8["jg"]=112+15
|
ops_rel8["jg"]=112+15
|
||||||
ops_rel8["jnle"]=112+15
|
ops_rel8["jnle"]=112+15
|
||||||
|
ops_rel8["loopne"]=224
|
||||||
|
ops_rel8["loopnz"]=224
|
||||||
|
ops_rel8["loope"]=225
|
||||||
|
ops_rel8["loopz"]=225
|
||||||
|
ops_rel8["loop"]=226
|
||||||
|
ops_rel8["jmp"]=235
|
||||||
# single-byte opcodes without operands
|
# single-byte opcodes without operands
|
||||||
ops_sb["nop"]=128+16
|
ops_sb["nop"]=128+16
|
||||||
|
ops_sb["movsb"]=164
|
||||||
|
ops_sb["movsw"]=165
|
||||||
ops_sb["stosb"]=170
|
ops_sb["stosb"]=170
|
||||||
ops_sb["stosw"]=171
|
ops_sb["stosw"]=171
|
||||||
ops_sb["lodsb"]=172
|
ops_sb["lodsb"]=172
|
||||||
@ -92,6 +100,17 @@ BEGIN {
|
|||||||
ops_sb["ret"]=195
|
ops_sb["ret"]=195
|
||||||
ops_sb["retf"]=203
|
ops_sb["retf"]=203
|
||||||
ops_sb["hlt"]=244
|
ops_sb["hlt"]=244
|
||||||
|
# prefix instructions
|
||||||
|
prefix["es"]=38
|
||||||
|
prefix["cs"]=46
|
||||||
|
prefix["ss"]=54
|
||||||
|
prefix["ds"]=64
|
||||||
|
prefix["lock"]=240
|
||||||
|
prefix["rep"]=243
|
||||||
|
prefix["repe"]=242
|
||||||
|
prefix["repz"]=242
|
||||||
|
prefix["repne"]=243
|
||||||
|
prefix["repnz"]=243
|
||||||
}
|
}
|
||||||
# error string to insert into listing
|
# error string to insert into listing
|
||||||
function err(str) {
|
function err(str) {
|
||||||
@ -118,6 +137,10 @@ function getsym(name) {
|
|||||||
if (substr(name,1,1)=="0" || int(name)) {
|
if (substr(name,1,1)=="0" || int(name)) {
|
||||||
return int(name)
|
return int(name)
|
||||||
}
|
}
|
||||||
|
if (name in r8 || name in r16 || name in sreg) {
|
||||||
|
err("Register name " name "is not a valid immediate")
|
||||||
|
return 0
|
||||||
|
}
|
||||||
if (substr(name,1,1)==".") name = plabel name
|
if (substr(name,1,1)==".") name = plabel name
|
||||||
if (name in prevsym) {
|
if (name in prevsym) {
|
||||||
return prevsym[name]
|
return prevsym[name]
|
||||||
@ -126,25 +149,22 @@ function getsym(name) {
|
|||||||
}
|
}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
# encode a signed byte
|
function push_byte(val) {
|
||||||
function pushb(val) {
|
|
||||||
#print("; pushb " val)
|
#print("; pushb " val)
|
||||||
if (val<0 || val>=256) err("Value " val " does not fit in byte")
|
if (val<0 || val>=256) err("Value " val " does not fit in byte")
|
||||||
hex=hex sprintf("%02X",val)
|
hex=hex sprintf("%02X",val)
|
||||||
}
|
}
|
||||||
# encode a signed byte (for rel8)
|
function push_signed_byte(val) {
|
||||||
function pushsb(val) {
|
|
||||||
while (val < 0) val = val + 256
|
while (val < 0) val = val + 256
|
||||||
pushb(val)
|
push_byte(val)
|
||||||
}
|
}
|
||||||
# encode a word
|
function push_word(val) {
|
||||||
function pushw(val) {
|
|
||||||
while (val<0) val=val+65536
|
while (val<0) val=val+65536
|
||||||
t=sprintf("%04X",val)
|
t=sprintf("%04X",val)
|
||||||
hex=hex substr(t,3) substr(t,1,2)
|
hex=hex substr(t,3) substr(t,1,2)
|
||||||
}
|
}
|
||||||
# encode a modrm byte
|
# rs is the register set (r8, r16) that can show up in str
|
||||||
function modrm(str, spare, rs) {
|
function push_modrm(str, spare, rs) {
|
||||||
mod=3
|
mod=3
|
||||||
rm=0
|
rm=0
|
||||||
if (str in rs) {
|
if (str in rs) {
|
||||||
@ -173,28 +193,43 @@ function modrm(str, spare, rs) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#print("; modR/M:", mod, spare, rm)
|
#print("; modR/M:", mod, spare, rm)
|
||||||
pushb(mod*64+spare*8+rm)
|
push_byte(mod*64+spare*8+rm)
|
||||||
if (mod == 1) {
|
if (mod == 1) {
|
||||||
pushb(disp)
|
push_byte(disp)
|
||||||
} else if (mod == 2 || (mod == 0 && rm == 6)) {
|
} else if (mod == 2 || (mod == 0 && rm == 6)) {
|
||||||
pushw(disp)
|
push_word(disp)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
# common encoding: two operands, one is modrm, other in spare field
|
# common encoding: two operands, one is modrm, other is register via spare field
|
||||||
# last two bits of opcode specify width (byte/word) and whether modrm operand is first or second
|
# last two bits of opcode specify width (byte/word) and whether modrm operand is first or second
|
||||||
function enc(opcode) {
|
function push_op_modrm(opcode) {
|
||||||
|
if (rm1 && byteop && op2 in r8) {
|
||||||
|
push_byte(opcode)
|
||||||
|
push_modrm(op1, r8[op2], r8)
|
||||||
|
} else if (rm1 && wordop && op2 in r16) {
|
||||||
|
push_byte(opcode+1)
|
||||||
|
push_modrm(op1, r16[op2], r16)
|
||||||
|
} else if (rm2 && byteop && op1 in r8) {
|
||||||
|
push_byte(opcode+2)
|
||||||
|
push_modrm(op2, r8[op1], r8)
|
||||||
|
} else if (rm2 && wordop && op1 in r16) {
|
||||||
|
push_byte(opcode+2+1)
|
||||||
|
push_modrm(op2, r16[op1], r16)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
# common encoding: one operand encoded as modrm with fixed spare field
|
# common encoding: one operand encoded as modrm with fixed spare field
|
||||||
# operand can be byte or word, encoded in last bit of opcode
|
# operand can be byte or word, encoded in last bit of opcode
|
||||||
function encfs(opcode, spare) {
|
function push_op_fixed_spare(opcode, spare) {
|
||||||
if (wordop) {
|
if (byteop) {
|
||||||
pushb(opcode)
|
push_byte(opcode)
|
||||||
modrm(op1, spare, r16)
|
push_modrm(op1, spare, r8)
|
||||||
} else if (byteop) {
|
return 1
|
||||||
pushb(opcode)
|
} else if (wordop) {
|
||||||
modrm(op1, spare, r8)
|
push_byte(opcode+1)
|
||||||
|
push_modrm(op1, spare, r16)
|
||||||
|
return 2
|
||||||
}
|
}
|
||||||
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
# dont process empty lines or comment lines
|
# dont process empty lines or comment lines
|
||||||
@ -233,6 +268,12 @@ $2=="sym" {
|
|||||||
}
|
}
|
||||||
# take note if we got a instruction size specifier
|
# take note if we got a instruction size specifier
|
||||||
op=$(opn)
|
op=$(opn)
|
||||||
|
if (op in prefix) {
|
||||||
|
push_byte(prefix[op])
|
||||||
|
opn=opn+1
|
||||||
|
op=$(opn)
|
||||||
|
}
|
||||||
|
|
||||||
if($(opn+1)=="byte" || $(opn+1)=="short") {
|
if($(opn+1)=="byte" || $(opn+1)=="short") {
|
||||||
wordop=0
|
wordop=0
|
||||||
dwordop=0
|
dwordop=0
|
||||||
@ -303,115 +344,113 @@ op=="db" {
|
|||||||
for(i=1;i<=c;i++) {
|
for(i=1;i<=c;i++) {
|
||||||
if (substr(a[i],1,1)=="\"") {
|
if (substr(a[i],1,1)=="\"") {
|
||||||
for(j=2;j<length(a[i]);j++) {
|
for(j=2;j<length(a[i]);j++) {
|
||||||
pushb(_ord_[substr(a[i],j,1)])
|
push_byte(_ord_[substr(a[i],j,1)])
|
||||||
}
|
}
|
||||||
} else pushb(getsym(a[i]))
|
} else push_byte(getsym(a[i]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
op=="dw" {
|
op=="dw" {
|
||||||
for(i=1;i<=c;i++) pushw(getsym(a[i]))
|
for(i=1;i<=c;i++) push_word(getsym(a[i]))
|
||||||
}
|
}
|
||||||
# arithmetics: ADD, SUB, XOR etc
|
# arithmetics: ADD, SUB, XOR etc
|
||||||
op in alu {
|
op in alu {
|
||||||
# modrm <- reg
|
push_op_modrm(alu[op]*8)
|
||||||
if (rm1 && op2 in r8) {
|
if (!hex) {
|
||||||
pushb(0+alu[op]*8)
|
size=push_op_fixed_spare(0x80, alu[op])
|
||||||
modrm(op1, r8[op2], r8)
|
if (size==1) {
|
||||||
} else if (rm1 && op2 in r16) {
|
push_byte(getsym(op2))
|
||||||
pushb(1+alu[op]*8)
|
} else if (size==2) {
|
||||||
modrm(op1, r16[op2], r16)
|
push_word(getsym(op2))
|
||||||
# reg <- modrm
|
}
|
||||||
} else if (op1 in r8 && rm2) {
|
|
||||||
pushb(2+alu[op]*8)
|
|
||||||
modrm(op2, r8[op1], r8)
|
|
||||||
} else if (op1 in r16 && rm2) {
|
|
||||||
pushb(3+alu[op]*8)
|
|
||||||
modrm(op2, r16[op1], r16)
|
|
||||||
# modrm <- imm
|
|
||||||
} else if (byteop && rm1 || op1 in r8) {
|
|
||||||
pushb(128) # 80
|
|
||||||
modrm(op1, alu[op], r8)
|
|
||||||
pushb(getsym(op2))
|
|
||||||
} else if (wordop && rm1 || op1 in r16) {
|
|
||||||
pushb(129) # 81
|
|
||||||
modrm(op1, alu[op], r16)
|
|
||||||
pushw(getsym(op2))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
# no idea why this made this extra, this is a AND without storing the result
|
||||||
|
op=="test" {
|
||||||
|
push_op_modrm(132) # 84
|
||||||
|
if (!hex) {
|
||||||
|
size=push_op_fixed_spare(246, 7)
|
||||||
|
if (size==1) {
|
||||||
|
push_byte(getsym(op2))
|
||||||
|
} else if (size==2) {
|
||||||
|
push_word(getsym(op2))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
op=="sar" && op2=="1" {
|
||||||
|
push_op_fixed_spare(208, 0) # D0 /0
|
||||||
|
}
|
||||||
|
op=="inc" && rm1 {
|
||||||
|
push_op_fixed_spare(254, 0) # FE /0
|
||||||
|
}
|
||||||
# MOV variants
|
# MOV variants
|
||||||
op=="mov" {
|
op=="mov" {
|
||||||
# modrm <-> reg
|
# modrm <-> reg
|
||||||
if (rm1 && op2 in r8) {
|
if (rm1 && op2 in r8) {
|
||||||
pushb(136) # 88
|
push_byte(136) # 88
|
||||||
modrm(op1, r8[op2], r8)
|
push_modrm(op1, r8[op2], r8)
|
||||||
} else if (rm1 && op2 in r16) {
|
} else if (rm1 && op2 in r16) {
|
||||||
pushb(137) # 89
|
push_byte(137) # 89
|
||||||
modrm(op1, r16[op2], r16)
|
push_modrm(op1, r16[op2], r16)
|
||||||
} else if (op1 in r8 && rm2) {
|
} else if (op1 in r8 && rm2) {
|
||||||
pushb(138)
|
push_byte(138)
|
||||||
modrm(op2, r8[op1], r8)
|
push_modrm(op2, r8[op1], r8)
|
||||||
} else if (op1 in r16 && rm2) {
|
} else if (op1 in r16 && rm2) {
|
||||||
pushb(139)
|
push_byte(139)
|
||||||
modrm(op2, r16[op1], r16)
|
push_modrm(op2, r16[op1], r16)
|
||||||
# modrm <-> sreg
|
# modrm <-> sreg
|
||||||
} else if (rm1 && op2 in sreg) {
|
} else if (rm1 && op2 in sreg) {
|
||||||
pushb(140)
|
push_byte(140)
|
||||||
modrm(op1, sreg[op2], r16)
|
push_modrm(op1, sreg[op2], r16)
|
||||||
} else if (rm2 && op1 in sreg) {
|
} else if (rm2 && op1 in sreg) {
|
||||||
pushb(142)
|
push_byte(142)
|
||||||
modrm(op2, sreg[op1], r16)
|
push_modrm(op2, sreg[op1], r16)
|
||||||
# reg <- imm
|
# reg <- imm
|
||||||
} else if (op1 in r8) {
|
} else if (op1 in r8) {
|
||||||
pushb(176+r8[op1])
|
push_byte(176+r8[op1])
|
||||||
pushb(getsym(op2))
|
push_byte(getsym(op2))
|
||||||
} else if (op1 in r16) {
|
} else if (op1 in r16) {
|
||||||
pushb(184+r16[op1])
|
push_byte(184+r16[op1])
|
||||||
pushw(getsym(op2))
|
push_word(getsym(op2))
|
||||||
# modrm <- imm
|
# modrm <- imm
|
||||||
} else if (byteop && rm1) {
|
} else if (byteop && rm1) {
|
||||||
pushb(198)
|
push_byte(198)
|
||||||
modrm(rm1, 0, r16)
|
push_modrm(rm1, 0, r16)
|
||||||
} else if (wordop && rm1) {
|
} else if (wordop && rm1) {
|
||||||
pushb(199)
|
push_byte(199)
|
||||||
modrm(rm1, 0, r16)
|
push_modrm(rm1, 0, r16)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
op=="push" && op1 in r16 {
|
op=="push" && op1 in r16 {
|
||||||
pushb(80+r16[op1])
|
push_byte(80+r16[op1])
|
||||||
}
|
}
|
||||||
op=="pop" && op1 in r16 {
|
op=="pop" && op1 in r16 {
|
||||||
pushb(88+r16[op1])
|
push_byte(88+r16[op1])
|
||||||
}
|
}
|
||||||
op=="test" && op2 in r8 { # 84
|
op=="int" && op1=="3" { # CC breakpoint
|
||||||
pushb(132)
|
push_byte(204)
|
||||||
modrm(op1, r8[op2], r8)
|
|
||||||
}
|
}
|
||||||
op=="int" { # CD
|
op=="int" { # CD
|
||||||
pushb(205)
|
push_byte(205)
|
||||||
pushb(getsym(op1))
|
push_byte(getsym(op1))
|
||||||
}
|
|
||||||
op=="jmp" && byteop {
|
|
||||||
pushb(235)
|
|
||||||
pushsb(getsym(op1)-(pos+2))
|
|
||||||
}
|
}
|
||||||
op=="jmp" && wordop {
|
op=="jmp" && wordop {
|
||||||
pushb(233)
|
push_byte(233)
|
||||||
pushw(getsym(op1)-(pos+3))
|
push_word(getsym(op1)-(pos+3))
|
||||||
}
|
}
|
||||||
op=="call" {
|
op=="call" && wordop {
|
||||||
pushb(232)
|
push_byte(232)
|
||||||
pushw(getsym(op1)-(pos+3))
|
push_word(getsym(op1)-(pos+3))
|
||||||
}
|
}
|
||||||
op=="neg" {
|
op=="neg" {
|
||||||
encfs(246, 3)
|
push_op_fixed_spare(246, 3)
|
||||||
}
|
}
|
||||||
# opcodes with rel8 encoding
|
# opcodes with rel8 encoding
|
||||||
op in ops_rel8 && byteop {
|
op in ops_rel8 && byteop && c==1 {
|
||||||
pushb(ops_rel8[op])
|
push_byte(ops_rel8[op])
|
||||||
pushb(getsym(op1)-(pos+2))
|
push_signed_byte(getsym(op1)-(pos+2))
|
||||||
}
|
}
|
||||||
# opcodes without arguments
|
# opcodes without arguments
|
||||||
op in ops_sb { pushb(ops_sb[op]) }
|
op in ops_sb { push_byte(ops_sb[op]) }
|
||||||
{
|
{
|
||||||
if (!hex) err("no encoding found")
|
if (!hex) err("no encoding found")
|
||||||
submit(pos)
|
submit(pos)
|
||||||
|
2
host/bin2db.sh
Executable file
2
host/bin2db.sh
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
xxd -c8 -g1 "$1"|cut -d' ' -f2-9|sed 's/[0-9A-Fa-f]\{2\}/0x&/g;s/ /, /g;s/^/ db /'
|
2034
src/lain.asm
Normal file
2034
src/lain.asm
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user