resume working; import awk-based assembler

This commit is contained in:
Nero 2023-01-01 19:04:31 +00:00
parent e48ae306ba
commit 77b1ad7cef
4 changed files with 451 additions and 55 deletions

View File

@ -1,80 +1,42 @@
PROGRAMS = hello.com sys.com
KERNEL = @rdos.com
DISTFILES = $(KERNEL) $(PROGRAMS)
VERSION = $(shell git log -1 --format=%cd --date=format:%Y%m%d)
PRODUCT = rdos
VERSION = $(shell git log -1 --format=%cd --date=format:%Y%m%d)
QEMU = qemu-system-i386
QEMU_ARGS += $(shell test -w /dev/kvm && echo --enable-kvm)
METRIC = stat -c '%8s %n'
PROGRAMS = hello.com sys.com
KERNEL = @$(PRODUCT).com
DISTFILES = $(KERNEL) $(PROGRAMS)
FLOPPY = 1440
# Programs used for building
NASM = nasm
NASM_ARGS = -s -Isrc --before "cpu 8086" -DPRODUCT=$(PRODUCT) -DVERSION=$(VERSION)
NASM_ARGS = -s --before "cpu 8086" -DPRODUCT=$(PRODUCT) -DVERSION=$(VERSION)
AS = host/as
SYS = ./sys.elf
ifndef DISPLAY
QEMU_ARGS += --display curses
endif
SYS = host/sys.elf
.PHONY: default clean qemu-floppy5 qemu-floppy3
.PHONY: default clean qemu
.PRECIOUS: %.com
default: fd1440.img
default: fd$(FLOPPY).img
# Host utils
host/%.elf: host/%.c
%.elf: host/%.c
$(CC) -o $@ $<
# COM programs
%.com: src/%.asm src/*.inc
$(NASM) $(NASM_ARGS) -l $(@:.com=.lst) -o $@ $<
$(METRIC) $@
$(NASM) $(NASM_ARGS) -Isrc -l $(@:.com=.lst) -o $@ $<
%.hex: src/%.asm src/*.inc
$(NASM) $(NASM_ARGS) -f ith -l $(@:.hex=.lst) -o $@ $<
# Bootloaders, first sector on partition
# Bootloader images
%.bs: boot/%.asm
$(NASM) $(NASM_ARGS) -o $@ $<
# Special case: variations of FAT vbr
fat1.bs: boot/fat.asm
$(NASM) $(NASM_ARGS) -DFAT12 -DCHS -o $@ $<
fat4.bs: boot/fat.asm
$(NASM) $(NASM_ARGS) -DFAT16 -DCHS -o $@ $<
fat6.bs: boot/fat.asm
$(NASM) $(NASM_ARGS) -DFAT16 -DCHS -DLARGE -o $@ $<
$(AS) $@ $<
fd%.img: $(DISTFILES) $(SYS)
mformat -C -i $@ -f $* -v "$(PRODUCT) $(VERSION)" ::
mcopy -i $@ $(DISTFILES) ::
$(SYS) $@ $(KERNEL)
hdd.img: mbr.bs $(KERNEL) fd1440.img
dd if=mbr.bs bs=512 count=1 of=$@
dd if=$(KERNEL) bs=512 seek=1 of=$@
dd if=fd1440.img bs=512 seek=63 of=$@
clean:
rm -f *.com *.bs *.0 *.lst *.img *.bin *.hex
rm -f host/*.elf
qemu-floppy3: fd1440.img
$(QEMU) $(QEMU_ARGS) -boot a -fda fd1440.img
qemu-floppy5: fd360.img
$(QEMU) $(QEMU_ARGS) -boot a -fda fd360.img
qemu-hdd: hdd.img
$(QEMU) $(QEMU_ARGS) -boot c -hda hdd.img
dosbox-%: %.com fd1440.img
dosbox -c "mount P $(CURDIR)" -c P: -c "IMGMOUNT A: FD1440.IMG -t floppy" -c "$< $(ARGS)" -c pause -c exit
emu2-%: %.com
emu2 $< $(ARGS)
qemu: fd$(FLOPPY).img
qemu-system-i386 $(shell test -w /dev/kvm && echo --enable-kvm) $(shell test -n "$DISPLAY" || echo --display curses) -boot a -drive file=fd$(FLOPPY).img,format=raw,index=0,if=floppy

8
boot/hello.asm Normal file
View File

@ -0,0 +1,8 @@
org 0x7C00
jmp short main
nop
nop
main mov ax, 0x0e37
int 0x10
halt hlt
jmp short halt

4
host/as Executable file
View File

@ -0,0 +1,4 @@
#!/bin/sh
outfile="$1"
infile="$2"
( awk -f host/asm.awk -v of=sym <"$infile"; cat "$infile") | awk -f host/asm.awk -v of=lst

422
host/asm.awk Normal file
View File

@ -0,0 +1,422 @@
#!/usr/bin/awk -f
BEGIN {
if (!of) of="lst"
pos=0
errors=0
# lookup table for ord()
for (i = 32; i <= 128; i++) {
t=sprintf("%c", i)
_ord_[t]=i
}
# 8-bit general purpose registers
r8["al"]=0
r8["cl"]=1
r8["dl"]=2
r8["bl"]=3
r8["ah"]=4
r8["ch"]=5
r8["dh"]=6
r8["bh"]=7
# 16-bit general purpose registers
r16["ax"]=0
r16["cx"]=1
r16["dx"]=2
r16["bx"]=3
r16["sp"]=4
r16["bp"]=5
r16["si"]=6
r16["di"]=7
# segment registers
sreg["es"]=0
sreg["cs"]=1
sreg["ss"]=2
sreg["ds"]=3
# indirect access register combinations
modstr["bxsi"]=0
modstr["sibx"]=0
modstr["bxdi"]=1
modstr["dibx"]=1
modstr["bpsi"]=2
modstr["sibp"]=2
modstr["bpdi"]=3
modstr["dibp"]=3
modstr["si"]=4
modstr["di"]=5
modstr["bx"]=7
# ALU operations
alu["add"]=0
alu["or"]=1
alu["adc"]=2
alu["sbb"]=3
alu["and"]=4
alu["sub"]=5
alu["xor"]=6
alu["cmp"]=7
# near conditional jumps
ops_rel8["jo"]=112+0
ops_rel8["jno"]=112+1
ops_rel8["jb"]=112+2
ops_rel8["jc"]=112+2
ops_rel8["jnae"]=112+2
ops_rel8["jae"]=112+3
ops_rel8["jnb"]=112+3
ops_rel8["jnc"]=112+3
ops_rel8["je"]=112+4
ops_rel8["jz"]=112+4
ops_rel8["jne"]=112+5
ops_rel8["jnz"]=112+5
ops_rel8["jbe"]=112+6
ops_rel8["jna"]=112+6
ops_rel8["ja"]=112+7
ops_rel8["jnbe"]=112+7
ops_rel8["js"]=112+8
ops_rel8["jns"]=112+9
ops_rel8["jp"]=112+10
ops_rel8["jpe"]=112+10
ops_rel8["jnp"]=112+11
ops_rel8["jpo"]=112+11
ops_rel8["jl"]=112+12
ops_rel8["jnge"]=112+12
ops_rel8["jge"]=112+13
ops_rel8["jbl"]=112+13
ops_rel8["jle"]=112+14
ops_rel8["jng"]=112+14
ops_rel8["jg"]=112+15
ops_rel8["jnle"]=112+15
# single-byte opcodes without operands
ops_sb["nop"]=128+16
ops_sb["stosb"]=170
ops_sb["stosw"]=171
ops_sb["lodsb"]=172
ops_sb["lodsw"]=173
ops_sb["ret"]=195
ops_sb["retf"]=203
ops_sb["hlt"]=244
}
# error string to insert into listing
function err(str) {
errors++
if (of=="lst") printf("**** %s\n", str)
}
# submit a assembling result to output
# set label of current line to off
function submit(off) {
if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0)
if (of=="hex" && hex) printf("%s", hex)
if (label) {
if (of=="sym") printf("%s\tsym\t%d\n", label, off)
if (label in prevsym && prevsym[label]!=off) {
err("label " label " different during second pass, was " prevsym[label] ", now " off)
}
sym[label]=off
}
pos=pos+length(hex)/2
hex=""
}
# gets a symbol's values
function getsym(name) {
if (substr(name,1,1)=="0" || int(name)) {
return int(name)
}
if (substr(name,1,1)==".") name = plabel name
if (name in prevsym) {
return prevsym[name]
} else if (name in sym) {
return sym[name]
}
return 0
}
# encode a signed byte
function pushb(val) {
#print("; pushb " val)
if (val<0 || val>=256) err("Value " val " does not fit in byte")
hex=hex sprintf("%02X",val)
}
# encode a signed byte (for rel8)
function pushsb(val) {
while (val < 0) val = val + 256
pushb(val)
}
# encode a word
function pushw(val) {
while (val<0) val=val+65536
t=sprintf("%04X",val)
hex=hex substr(t,3) substr(t,1,2)
}
# encode a modrm byte
function modrm(str, spare, rs) {
mod=3
rm=0
if (str in rs) {
mod=3
rm=rs[str]
} else if (substr(str,1,1)=="[") {
rmap=""
gsub(/^\[|\]$/, "", str)
split(str,rmp,"+")
disp=0
for (i in rmp) {
if (rmp[i] in r16) {
rmap=rmap rmp[i]
} else {
disp = disp + getsym(rmp[i])
mod=2
}
}
if (!rmap) {
mod=0
rm=6
} else if (rmap in modstr) {
rm=modstr[rmap]
} else {
err("Bad modR/M")
}
}
#print("; modR/M:", mod, spare, rm)
pushb(mod*64+spare*8+rm)
if (mod == 1) {
pushb(disp)
} else if (mod == 2 || (mod == 0 && rm == 6)) {
pushw(disp)
}
}
# common encoding: two operands, one is modrm, other in spare field
# last two bits of opcode specify width (byte/word) and whether modrm operand is first or second
function enc(opcode) {
}
# common encoding: one operand encoded as modrm with fixed spare field
# operand can be byte or word, encoded in last bit of opcode
function encfs(opcode, spare) {
if (wordop) {
pushb(opcode)
modrm(op1, spare, r16)
} else if (byteop) {
pushb(opcode)
modrm(op1, spare, r8)
}
}
# dont process empty lines or comment lines
/^( |\t)*(;|%)/ || /^( |\t)*$/ {
if (of=="lst") printf("%24s%s\n","",$0)
next
}
# load symbols from previous pass
$2=="sym" {
prevsym[$1]=int($3)
#printf("; %s (%s=%X)\n", $0,$1,prevsym[$1])
next
}
# Start parsing the line
# and set up per-line vars
{
label=$1
gsub(/:$/, "",label)
opn=2
split("", b, ":")
byteop=1
wordop=1
dwordop=1
}
# no label on line! fixup
/^ / || /^\t/ {
label=""
opn=1
}
{
# implement local labels
if (substr(label,1,1)==".") {
label=plabel label
} else if (label) {
plabel=label
}
# take note if we got a instruction size specifier
op=$(opn)
if($(opn+1)=="byte" || $(opn+1)=="short") {
wordop=0
dwordop=0
opn++
} else if ($(opn+1)=="word" || $(opn+1)=="near") {
byteop=0
dwordop=0
opn++
} else if ($(opn+1)=="dword" || $(opn+1)=="far") {
byteop=0
wordop=0
opn++
}
split("", a, ":")
c=0
for (i=opn+1;i<=NF;i++) {
if (substr($(i),1,1)==";") break
a[++c]=$(i)
if (substr($(i),1,1)=="\"") {
do {
i++
j=index($(i), "\"")
if (j) {
a[c]=a[c] " " substr($(i), 1, j)
break
} else {
a[c]=a[c] " " $(i)
}
} while($(i))
} else {
gsub(/,$/, "", a[c])
}
}
op1=a[1]
op2=a[2]
# pre-estimate if operand could be encoded as modrm
rm1=(op1 in r16) || (op1 in r8) || substr(op1,1,1)=="["
rm2=(op2 in r16) || (op2 in r8) || substr(op2,1,1)=="["
# if byte register in operands, it cant be a word or dword operation
if (op1 in r8 || op2 in r8) {
wordop=0
dwordop=0
}
# if word register in operands, it cant be a byte or dword operation
if (op1 in r16 || op2 in r16 || op2 in sreg) {
byteop=0
dwordop=0
}
}
# the source line is parsed by here:
# - op: opcode name
# - a: array of operands, starting with 1
# - c: number of operands
# - byteop, wordop, dwordop: test before encoding, all 1 per default
# pseudo-opcodes
op=="cpu" {
next
}
op=="org" {
pos=getsym(a[1])
submit(pos);next
}
op=="equ" {
val=getsym(a[1])
submit(val);next
}
op=="db" {
for(i=1;i<=c;i++) {
if (substr(a[i],1,1)=="\"") {
for(j=2;j<length(a[i]);j++) {
pushb(_ord_[substr(a[i],j,1)])
}
} else pushb(getsym(a[i]))
}
}
op=="dw" {
for(i=1;i<=c;i++) pushw(getsym(a[i]))
}
# arithmetics: ADD, SUB, XOR etc
op in alu {
# modrm <- reg
if (rm1 && op2 in r8) {
pushb(0+alu[op]*8)
modrm(op1, r8[op2], r8)
} else if (rm1 && op2 in r16) {
pushb(1+alu[op]*8)
modrm(op1, r16[op2], r16)
# reg <- modrm
} else if (op1 in r8 && rm2) {
pushb(2+alu[op]*8)
modrm(op2, r8[op1], r8)
} else if (op1 in r16 && rm2) {
pushb(3+alu[op]*8)
modrm(op2, r16[op1], r16)
# modrm <- imm
} else if (byteop && rm1 || op1 in r8) {
pushb(128) # 80
modrm(op1, alu[op], r8)
pushb(getsym(op2))
} else if (wordop && rm1 || op1 in r16) {
pushb(129) # 81
modrm(op1, alu[op], r16)
pushw(getsym(op2))
}
}
# MOV variants
op=="mov" {
# modrm <-> reg
if (rm1 && op2 in r8) {
pushb(136) # 88
modrm(op1, r8[op2], r8)
} else if (rm1 && op2 in r16) {
pushb(137) # 89
modrm(op1, r16[op2], r16)
} else if (op1 in r8 && rm2) {
pushb(138)
modrm(op2, r8[op1], r8)
} else if (op1 in r16 && rm2) {
pushb(139)
modrm(op2, r16[op1], r16)
# modrm <-> sreg
} else if (rm1 && op2 in sreg) {
pushb(140)
modrm(op1, sreg[op2], r16)
} else if (rm2 && op1 in sreg) {
pushb(142)
modrm(op2, sreg[op1], r16)
# reg <- imm
} else if (op1 in r8) {
pushb(176+r8[op1])
pushb(getsym(op2))
} else if (op1 in r16) {
pushb(184+r16[op1])
pushw(getsym(op2))
# modrm <- imm
} else if (byteop && rm1) {
pushb(198)
modrm(rm1, 0, r16)
} else if (wordop && rm1) {
pushb(199)
modrm(rm1, 0, r16)
}
}
op=="push" && op1 in r16 {
pushb(80+r16[op1])
}
op=="pop" && op1 in r16 {
pushb(88+r16[op1])
}
op=="test" && op2 in r8 { # 84
pushb(132)
modrm(op1, r8[op2], r8)
}
op=="int" { # CD
pushb(205)
pushb(getsym(op1))
}
op=="jmp" && byteop {
pushb(235)
pushsb(getsym(op1)-(pos+2))
}
op=="jmp" && wordop {
pushb(233)
pushw(getsym(op1)-(pos+3))
}
op=="call" {
pushb(232)
pushw(getsym(op1)-(pos+3))
}
op=="neg" {
encfs(246, 3)
}
# opcodes with rel8 encoding
op in ops_rel8 && byteop {
pushb(ops_rel8[op])
pushb(getsym(op1)-(pos+2))
}
# opcodes without arguments
op in ops_sb { pushb(ops_sb[op]) }
{
if (!hex) err("no encoding found")
submit(pos)
}
END{
if (of=="hex") printf("\n")
if (errors) exit(1)
}