resume working; import awk-based assembler
This commit is contained in:
parent
e48ae306ba
commit
77b1ad7cef
72
Makefile
72
Makefile
@ -1,80 +1,42 @@
|
||||
PROGRAMS = hello.com sys.com
|
||||
KERNEL = @rdos.com
|
||||
DISTFILES = $(KERNEL) $(PROGRAMS)
|
||||
|
||||
VERSION = $(shell git log -1 --format=%cd --date=format:%Y%m%d)
|
||||
PRODUCT = rdos
|
||||
VERSION = $(shell git log -1 --format=%cd --date=format:%Y%m%d)
|
||||
|
||||
QEMU = qemu-system-i386
|
||||
QEMU_ARGS += $(shell test -w /dev/kvm && echo --enable-kvm)
|
||||
|
||||
METRIC = stat -c '%8s %n'
|
||||
PROGRAMS = hello.com sys.com
|
||||
KERNEL = @$(PRODUCT).com
|
||||
DISTFILES = $(KERNEL) $(PROGRAMS)
|
||||
FLOPPY = 1440
|
||||
|
||||
# Programs used for building
|
||||
NASM = nasm
|
||||
NASM_ARGS = -s -Isrc --before "cpu 8086" -DPRODUCT=$(PRODUCT) -DVERSION=$(VERSION)
|
||||
NASM_ARGS = -s --before "cpu 8086" -DPRODUCT=$(PRODUCT) -DVERSION=$(VERSION)
|
||||
AS = host/as
|
||||
SYS = ./sys.elf
|
||||
|
||||
ifndef DISPLAY
|
||||
QEMU_ARGS += --display curses
|
||||
endif
|
||||
|
||||
SYS = host/sys.elf
|
||||
|
||||
.PHONY: default clean qemu-floppy5 qemu-floppy3
|
||||
.PHONY: default clean qemu
|
||||
.PRECIOUS: %.com
|
||||
|
||||
default: fd1440.img
|
||||
default: fd$(FLOPPY).img
|
||||
|
||||
# Host utils
|
||||
host/%.elf: host/%.c
|
||||
%.elf: host/%.c
|
||||
$(CC) -o $@ $<
|
||||
|
||||
# COM programs
|
||||
%.com: src/%.asm src/*.inc
|
||||
$(NASM) $(NASM_ARGS) -l $(@:.com=.lst) -o $@ $<
|
||||
$(METRIC) $@
|
||||
$(NASM) $(NASM_ARGS) -Isrc -l $(@:.com=.lst) -o $@ $<
|
||||
|
||||
%.hex: src/%.asm src/*.inc
|
||||
$(NASM) $(NASM_ARGS) -f ith -l $(@:.hex=.lst) -o $@ $<
|
||||
|
||||
# Bootloaders, first sector on partition
|
||||
# Bootloader images
|
||||
%.bs: boot/%.asm
|
||||
$(NASM) $(NASM_ARGS) -o $@ $<
|
||||
|
||||
# Special case: variations of FAT vbr
|
||||
fat1.bs: boot/fat.asm
|
||||
$(NASM) $(NASM_ARGS) -DFAT12 -DCHS -o $@ $<
|
||||
|
||||
fat4.bs: boot/fat.asm
|
||||
$(NASM) $(NASM_ARGS) -DFAT16 -DCHS -o $@ $<
|
||||
|
||||
fat6.bs: boot/fat.asm
|
||||
$(NASM) $(NASM_ARGS) -DFAT16 -DCHS -DLARGE -o $@ $<
|
||||
$(AS) $@ $<
|
||||
|
||||
fd%.img: $(DISTFILES) $(SYS)
|
||||
mformat -C -i $@ -f $* -v "$(PRODUCT) $(VERSION)" ::
|
||||
mcopy -i $@ $(DISTFILES) ::
|
||||
$(SYS) $@ $(KERNEL)
|
||||
|
||||
hdd.img: mbr.bs $(KERNEL) fd1440.img
|
||||
dd if=mbr.bs bs=512 count=1 of=$@
|
||||
dd if=$(KERNEL) bs=512 seek=1 of=$@
|
||||
dd if=fd1440.img bs=512 seek=63 of=$@
|
||||
|
||||
clean:
|
||||
rm -f *.com *.bs *.0 *.lst *.img *.bin *.hex
|
||||
rm -f host/*.elf
|
||||
|
||||
qemu-floppy3: fd1440.img
|
||||
$(QEMU) $(QEMU_ARGS) -boot a -fda fd1440.img
|
||||
|
||||
qemu-floppy5: fd360.img
|
||||
$(QEMU) $(QEMU_ARGS) -boot a -fda fd360.img
|
||||
|
||||
qemu-hdd: hdd.img
|
||||
$(QEMU) $(QEMU_ARGS) -boot c -hda hdd.img
|
||||
|
||||
dosbox-%: %.com fd1440.img
|
||||
dosbox -c "mount P $(CURDIR)" -c P: -c "IMGMOUNT A: FD1440.IMG -t floppy" -c "$< $(ARGS)" -c pause -c exit
|
||||
|
||||
emu2-%: %.com
|
||||
emu2 $< $(ARGS)
|
||||
qemu: fd$(FLOPPY).img
|
||||
qemu-system-i386 $(shell test -w /dev/kvm && echo --enable-kvm) $(shell test -n "$DISPLAY" || echo --display curses) -boot a -drive file=fd$(FLOPPY).img,format=raw,index=0,if=floppy
|
||||
|
8
boot/hello.asm
Normal file
8
boot/hello.asm
Normal file
@ -0,0 +1,8 @@
|
||||
org 0x7C00
|
||||
jmp short main
|
||||
nop
|
||||
nop
|
||||
main mov ax, 0x0e37
|
||||
int 0x10
|
||||
halt hlt
|
||||
jmp short halt
|
4
host/as
Executable file
4
host/as
Executable file
@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
outfile="$1"
|
||||
infile="$2"
|
||||
( awk -f host/asm.awk -v of=sym <"$infile"; cat "$infile") | awk -f host/asm.awk -v of=lst
|
422
host/asm.awk
Normal file
422
host/asm.awk
Normal file
@ -0,0 +1,422 @@
|
||||
#!/usr/bin/awk -f
|
||||
BEGIN {
|
||||
if (!of) of="lst"
|
||||
pos=0
|
||||
errors=0
|
||||
# lookup table for ord()
|
||||
for (i = 32; i <= 128; i++) {
|
||||
t=sprintf("%c", i)
|
||||
_ord_[t]=i
|
||||
}
|
||||
# 8-bit general purpose registers
|
||||
r8["al"]=0
|
||||
r8["cl"]=1
|
||||
r8["dl"]=2
|
||||
r8["bl"]=3
|
||||
r8["ah"]=4
|
||||
r8["ch"]=5
|
||||
r8["dh"]=6
|
||||
r8["bh"]=7
|
||||
# 16-bit general purpose registers
|
||||
r16["ax"]=0
|
||||
r16["cx"]=1
|
||||
r16["dx"]=2
|
||||
r16["bx"]=3
|
||||
r16["sp"]=4
|
||||
r16["bp"]=5
|
||||
r16["si"]=6
|
||||
r16["di"]=7
|
||||
# segment registers
|
||||
sreg["es"]=0
|
||||
sreg["cs"]=1
|
||||
sreg["ss"]=2
|
||||
sreg["ds"]=3
|
||||
# indirect access register combinations
|
||||
modstr["bxsi"]=0
|
||||
modstr["sibx"]=0
|
||||
modstr["bxdi"]=1
|
||||
modstr["dibx"]=1
|
||||
modstr["bpsi"]=2
|
||||
modstr["sibp"]=2
|
||||
modstr["bpdi"]=3
|
||||
modstr["dibp"]=3
|
||||
modstr["si"]=4
|
||||
modstr["di"]=5
|
||||
modstr["bx"]=7
|
||||
# ALU operations
|
||||
alu["add"]=0
|
||||
alu["or"]=1
|
||||
alu["adc"]=2
|
||||
alu["sbb"]=3
|
||||
alu["and"]=4
|
||||
alu["sub"]=5
|
||||
alu["xor"]=6
|
||||
alu["cmp"]=7
|
||||
# near conditional jumps
|
||||
ops_rel8["jo"]=112+0
|
||||
ops_rel8["jno"]=112+1
|
||||
ops_rel8["jb"]=112+2
|
||||
ops_rel8["jc"]=112+2
|
||||
ops_rel8["jnae"]=112+2
|
||||
ops_rel8["jae"]=112+3
|
||||
ops_rel8["jnb"]=112+3
|
||||
ops_rel8["jnc"]=112+3
|
||||
ops_rel8["je"]=112+4
|
||||
ops_rel8["jz"]=112+4
|
||||
ops_rel8["jne"]=112+5
|
||||
ops_rel8["jnz"]=112+5
|
||||
ops_rel8["jbe"]=112+6
|
||||
ops_rel8["jna"]=112+6
|
||||
ops_rel8["ja"]=112+7
|
||||
ops_rel8["jnbe"]=112+7
|
||||
ops_rel8["js"]=112+8
|
||||
ops_rel8["jns"]=112+9
|
||||
ops_rel8["jp"]=112+10
|
||||
ops_rel8["jpe"]=112+10
|
||||
ops_rel8["jnp"]=112+11
|
||||
ops_rel8["jpo"]=112+11
|
||||
ops_rel8["jl"]=112+12
|
||||
ops_rel8["jnge"]=112+12
|
||||
ops_rel8["jge"]=112+13
|
||||
ops_rel8["jbl"]=112+13
|
||||
ops_rel8["jle"]=112+14
|
||||
ops_rel8["jng"]=112+14
|
||||
ops_rel8["jg"]=112+15
|
||||
ops_rel8["jnle"]=112+15
|
||||
# single-byte opcodes without operands
|
||||
ops_sb["nop"]=128+16
|
||||
ops_sb["stosb"]=170
|
||||
ops_sb["stosw"]=171
|
||||
ops_sb["lodsb"]=172
|
||||
ops_sb["lodsw"]=173
|
||||
ops_sb["ret"]=195
|
||||
ops_sb["retf"]=203
|
||||
ops_sb["hlt"]=244
|
||||
}
|
||||
# error string to insert into listing
|
||||
function err(str) {
|
||||
errors++
|
||||
if (of=="lst") printf("**** %s\n", str)
|
||||
}
|
||||
# submit a assembling result to output
|
||||
# set label of current line to off
|
||||
function submit(off) {
|
||||
if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0)
|
||||
if (of=="hex" && hex) printf("%s", hex)
|
||||
if (label) {
|
||||
if (of=="sym") printf("%s\tsym\t%d\n", label, off)
|
||||
if (label in prevsym && prevsym[label]!=off) {
|
||||
err("label " label " different during second pass, was " prevsym[label] ", now " off)
|
||||
}
|
||||
sym[label]=off
|
||||
}
|
||||
pos=pos+length(hex)/2
|
||||
hex=""
|
||||
}
|
||||
# gets a symbol's values
|
||||
function getsym(name) {
|
||||
if (substr(name,1,1)=="0" || int(name)) {
|
||||
return int(name)
|
||||
}
|
||||
if (substr(name,1,1)==".") name = plabel name
|
||||
if (name in prevsym) {
|
||||
return prevsym[name]
|
||||
} else if (name in sym) {
|
||||
return sym[name]
|
||||
}
|
||||
return 0
|
||||
}
|
||||
# encode a signed byte
|
||||
function pushb(val) {
|
||||
#print("; pushb " val)
|
||||
if (val<0 || val>=256) err("Value " val " does not fit in byte")
|
||||
hex=hex sprintf("%02X",val)
|
||||
}
|
||||
# encode a signed byte (for rel8)
|
||||
function pushsb(val) {
|
||||
while (val < 0) val = val + 256
|
||||
pushb(val)
|
||||
}
|
||||
# encode a word
|
||||
function pushw(val) {
|
||||
while (val<0) val=val+65536
|
||||
t=sprintf("%04X",val)
|
||||
hex=hex substr(t,3) substr(t,1,2)
|
||||
}
|
||||
# encode a modrm byte
|
||||
function modrm(str, spare, rs) {
|
||||
mod=3
|
||||
rm=0
|
||||
if (str in rs) {
|
||||
mod=3
|
||||
rm=rs[str]
|
||||
} else if (substr(str,1,1)=="[") {
|
||||
rmap=""
|
||||
gsub(/^\[|\]$/, "", str)
|
||||
split(str,rmp,"+")
|
||||
disp=0
|
||||
for (i in rmp) {
|
||||
if (rmp[i] in r16) {
|
||||
rmap=rmap rmp[i]
|
||||
} else {
|
||||
disp = disp + getsym(rmp[i])
|
||||
mod=2
|
||||
}
|
||||
}
|
||||
if (!rmap) {
|
||||
mod=0
|
||||
rm=6
|
||||
} else if (rmap in modstr) {
|
||||
rm=modstr[rmap]
|
||||
} else {
|
||||
err("Bad modR/M")
|
||||
}
|
||||
}
|
||||
#print("; modR/M:", mod, spare, rm)
|
||||
pushb(mod*64+spare*8+rm)
|
||||
if (mod == 1) {
|
||||
pushb(disp)
|
||||
} else if (mod == 2 || (mod == 0 && rm == 6)) {
|
||||
pushw(disp)
|
||||
}
|
||||
}
|
||||
# common encoding: two operands, one is modrm, other in spare field
|
||||
# last two bits of opcode specify width (byte/word) and whether modrm operand is first or second
|
||||
function enc(opcode) {
|
||||
|
||||
}
|
||||
# common encoding: one operand encoded as modrm with fixed spare field
|
||||
# operand can be byte or word, encoded in last bit of opcode
|
||||
function encfs(opcode, spare) {
|
||||
if (wordop) {
|
||||
pushb(opcode)
|
||||
modrm(op1, spare, r16)
|
||||
} else if (byteop) {
|
||||
pushb(opcode)
|
||||
modrm(op1, spare, r8)
|
||||
}
|
||||
}
|
||||
|
||||
# dont process empty lines or comment lines
|
||||
/^( |\t)*(;|%)/ || /^( |\t)*$/ {
|
||||
if (of=="lst") printf("%24s%s\n","",$0)
|
||||
next
|
||||
}
|
||||
# load symbols from previous pass
|
||||
$2=="sym" {
|
||||
prevsym[$1]=int($3)
|
||||
#printf("; %s (%s=%X)\n", $0,$1,prevsym[$1])
|
||||
next
|
||||
}
|
||||
# Start parsing the line
|
||||
# and set up per-line vars
|
||||
{
|
||||
label=$1
|
||||
gsub(/:$/, "",label)
|
||||
opn=2
|
||||
split("", b, ":")
|
||||
byteop=1
|
||||
wordop=1
|
||||
dwordop=1
|
||||
}
|
||||
# no label on line! fixup
|
||||
/^ / || /^\t/ {
|
||||
label=""
|
||||
opn=1
|
||||
}
|
||||
{
|
||||
# implement local labels
|
||||
if (substr(label,1,1)==".") {
|
||||
label=plabel label
|
||||
} else if (label) {
|
||||
plabel=label
|
||||
}
|
||||
# take note if we got a instruction size specifier
|
||||
op=$(opn)
|
||||
if($(opn+1)=="byte" || $(opn+1)=="short") {
|
||||
wordop=0
|
||||
dwordop=0
|
||||
opn++
|
||||
} else if ($(opn+1)=="word" || $(opn+1)=="near") {
|
||||
byteop=0
|
||||
dwordop=0
|
||||
opn++
|
||||
} else if ($(opn+1)=="dword" || $(opn+1)=="far") {
|
||||
byteop=0
|
||||
wordop=0
|
||||
opn++
|
||||
}
|
||||
split("", a, ":")
|
||||
c=0
|
||||
for (i=opn+1;i<=NF;i++) {
|
||||
if (substr($(i),1,1)==";") break
|
||||
a[++c]=$(i)
|
||||
if (substr($(i),1,1)=="\"") {
|
||||
do {
|
||||
i++
|
||||
j=index($(i), "\"")
|
||||
if (j) {
|
||||
a[c]=a[c] " " substr($(i), 1, j)
|
||||
break
|
||||
} else {
|
||||
a[c]=a[c] " " $(i)
|
||||
}
|
||||
} while($(i))
|
||||
} else {
|
||||
gsub(/,$/, "", a[c])
|
||||
}
|
||||
}
|
||||
op1=a[1]
|
||||
op2=a[2]
|
||||
# pre-estimate if operand could be encoded as modrm
|
||||
rm1=(op1 in r16) || (op1 in r8) || substr(op1,1,1)=="["
|
||||
rm2=(op2 in r16) || (op2 in r8) || substr(op2,1,1)=="["
|
||||
# if byte register in operands, it cant be a word or dword operation
|
||||
if (op1 in r8 || op2 in r8) {
|
||||
wordop=0
|
||||
dwordop=0
|
||||
}
|
||||
# if word register in operands, it cant be a byte or dword operation
|
||||
if (op1 in r16 || op2 in r16 || op2 in sreg) {
|
||||
byteop=0
|
||||
dwordop=0
|
||||
}
|
||||
}
|
||||
# the source line is parsed by here:
|
||||
# - op: opcode name
|
||||
# - a: array of operands, starting with 1
|
||||
# - c: number of operands
|
||||
# - byteop, wordop, dwordop: test before encoding, all 1 per default
|
||||
# pseudo-opcodes
|
||||
op=="cpu" {
|
||||
next
|
||||
}
|
||||
op=="org" {
|
||||
pos=getsym(a[1])
|
||||
submit(pos);next
|
||||
}
|
||||
op=="equ" {
|
||||
val=getsym(a[1])
|
||||
submit(val);next
|
||||
}
|
||||
op=="db" {
|
||||
for(i=1;i<=c;i++) {
|
||||
if (substr(a[i],1,1)=="\"") {
|
||||
for(j=2;j<length(a[i]);j++) {
|
||||
pushb(_ord_[substr(a[i],j,1)])
|
||||
}
|
||||
} else pushb(getsym(a[i]))
|
||||
}
|
||||
}
|
||||
op=="dw" {
|
||||
for(i=1;i<=c;i++) pushw(getsym(a[i]))
|
||||
}
|
||||
# arithmetics: ADD, SUB, XOR etc
|
||||
op in alu {
|
||||
# modrm <- reg
|
||||
if (rm1 && op2 in r8) {
|
||||
pushb(0+alu[op]*8)
|
||||
modrm(op1, r8[op2], r8)
|
||||
} else if (rm1 && op2 in r16) {
|
||||
pushb(1+alu[op]*8)
|
||||
modrm(op1, r16[op2], r16)
|
||||
# reg <- modrm
|
||||
} else if (op1 in r8 && rm2) {
|
||||
pushb(2+alu[op]*8)
|
||||
modrm(op2, r8[op1], r8)
|
||||
} else if (op1 in r16 && rm2) {
|
||||
pushb(3+alu[op]*8)
|
||||
modrm(op2, r16[op1], r16)
|
||||
# modrm <- imm
|
||||
} else if (byteop && rm1 || op1 in r8) {
|
||||
pushb(128) # 80
|
||||
modrm(op1, alu[op], r8)
|
||||
pushb(getsym(op2))
|
||||
} else if (wordop && rm1 || op1 in r16) {
|
||||
pushb(129) # 81
|
||||
modrm(op1, alu[op], r16)
|
||||
pushw(getsym(op2))
|
||||
}
|
||||
}
|
||||
# MOV variants
|
||||
op=="mov" {
|
||||
# modrm <-> reg
|
||||
if (rm1 && op2 in r8) {
|
||||
pushb(136) # 88
|
||||
modrm(op1, r8[op2], r8)
|
||||
} else if (rm1 && op2 in r16) {
|
||||
pushb(137) # 89
|
||||
modrm(op1, r16[op2], r16)
|
||||
} else if (op1 in r8 && rm2) {
|
||||
pushb(138)
|
||||
modrm(op2, r8[op1], r8)
|
||||
} else if (op1 in r16 && rm2) {
|
||||
pushb(139)
|
||||
modrm(op2, r16[op1], r16)
|
||||
# modrm <-> sreg
|
||||
} else if (rm1 && op2 in sreg) {
|
||||
pushb(140)
|
||||
modrm(op1, sreg[op2], r16)
|
||||
} else if (rm2 && op1 in sreg) {
|
||||
pushb(142)
|
||||
modrm(op2, sreg[op1], r16)
|
||||
# reg <- imm
|
||||
} else if (op1 in r8) {
|
||||
pushb(176+r8[op1])
|
||||
pushb(getsym(op2))
|
||||
} else if (op1 in r16) {
|
||||
pushb(184+r16[op1])
|
||||
pushw(getsym(op2))
|
||||
# modrm <- imm
|
||||
} else if (byteop && rm1) {
|
||||
pushb(198)
|
||||
modrm(rm1, 0, r16)
|
||||
} else if (wordop && rm1) {
|
||||
pushb(199)
|
||||
modrm(rm1, 0, r16)
|
||||
}
|
||||
}
|
||||
op=="push" && op1 in r16 {
|
||||
pushb(80+r16[op1])
|
||||
}
|
||||
op=="pop" && op1 in r16 {
|
||||
pushb(88+r16[op1])
|
||||
}
|
||||
op=="test" && op2 in r8 { # 84
|
||||
pushb(132)
|
||||
modrm(op1, r8[op2], r8)
|
||||
}
|
||||
op=="int" { # CD
|
||||
pushb(205)
|
||||
pushb(getsym(op1))
|
||||
}
|
||||
op=="jmp" && byteop {
|
||||
pushb(235)
|
||||
pushsb(getsym(op1)-(pos+2))
|
||||
}
|
||||
op=="jmp" && wordop {
|
||||
pushb(233)
|
||||
pushw(getsym(op1)-(pos+3))
|
||||
}
|
||||
op=="call" {
|
||||
pushb(232)
|
||||
pushw(getsym(op1)-(pos+3))
|
||||
}
|
||||
op=="neg" {
|
||||
encfs(246, 3)
|
||||
}
|
||||
# opcodes with rel8 encoding
|
||||
op in ops_rel8 && byteop {
|
||||
pushb(ops_rel8[op])
|
||||
pushb(getsym(op1)-(pos+2))
|
||||
}
|
||||
# opcodes without arguments
|
||||
op in ops_sb { pushb(ops_sb[op]) }
|
||||
{
|
||||
if (!hex) err("no encoding found")
|
||||
submit(pos)
|
||||
}
|
||||
END{
|
||||
if (of=="hex") printf("\n")
|
||||
if (errors) exit(1)
|
||||
}
|
Loading…
Reference in New Issue
Block a user