diff --git a/Makefile b/Makefile index be6baf8..807c98b 100644 --- a/Makefile +++ b/Makefile @@ -1,80 +1,42 @@ -PROGRAMS = hello.com sys.com -KERNEL = @rdos.com -DISTFILES = $(KERNEL) $(PROGRAMS) - -VERSION = $(shell git log -1 --format=%cd --date=format:%Y%m%d) PRODUCT = rdos +VERSION = $(shell git log -1 --format=%cd --date=format:%Y%m%d) -QEMU = qemu-system-i386 -QEMU_ARGS += $(shell test -w /dev/kvm && echo --enable-kvm) - -METRIC = stat -c '%8s %n' +PROGRAMS = hello.com sys.com +KERNEL = @$(PRODUCT).com +DISTFILES = $(KERNEL) $(PROGRAMS) +FLOPPY = 1440 +# Programs used for building NASM = nasm -NASM_ARGS = -s -Isrc --before "cpu 8086" -DPRODUCT=$(PRODUCT) -DVERSION=$(VERSION) +NASM_ARGS = -s --before "cpu 8086" -DPRODUCT=$(PRODUCT) -DVERSION=$(VERSION) +AS = host/as +SYS = ./sys.elf -ifndef DISPLAY -QEMU_ARGS += --display curses -endif - -SYS = host/sys.elf - -.PHONY: default clean qemu-floppy5 qemu-floppy3 +.PHONY: default clean qemu .PRECIOUS: %.com -default: fd1440.img +default: fd$(FLOPPY).img # Host utils -host/%.elf: host/%.c +%.elf: host/%.c $(CC) -o $@ $< # COM programs %.com: src/%.asm src/*.inc - $(NASM) $(NASM_ARGS) -l $(@:.com=.lst) -o $@ $< - $(METRIC) $@ + $(NASM) $(NASM_ARGS) -Isrc -l $(@:.com=.lst) -o $@ $< -%.hex: src/%.asm src/*.inc - $(NASM) $(NASM_ARGS) -f ith -l $(@:.hex=.lst) -o $@ $< - -# Bootloaders, first sector on partition +# Bootloader images %.bs: boot/%.asm - $(NASM) $(NASM_ARGS) -o $@ $< - -# Special case: variations of FAT vbr -fat1.bs: boot/fat.asm - $(NASM) $(NASM_ARGS) -DFAT12 -DCHS -o $@ $< - -fat4.bs: boot/fat.asm - $(NASM) $(NASM_ARGS) -DFAT16 -DCHS -o $@ $< - -fat6.bs: boot/fat.asm - $(NASM) $(NASM_ARGS) -DFAT16 -DCHS -DLARGE -o $@ $< + $(AS) $@ $< fd%.img: $(DISTFILES) $(SYS) mformat -C -i $@ -f $* -v "$(PRODUCT) $(VERSION)" :: mcopy -i $@ $(DISTFILES) :: $(SYS) $@ $(KERNEL) -hdd.img: mbr.bs $(KERNEL) fd1440.img - dd if=mbr.bs bs=512 count=1 of=$@ - dd if=$(KERNEL) bs=512 seek=1 of=$@ - dd if=fd1440.img bs=512 seek=63 of=$@ - clean: rm -f *.com *.bs *.0 *.lst *.img *.bin *.hex rm -f host/*.elf -qemu-floppy3: fd1440.img - $(QEMU) $(QEMU_ARGS) -boot a -fda fd1440.img - -qemu-floppy5: fd360.img - $(QEMU) $(QEMU_ARGS) -boot a -fda fd360.img - -qemu-hdd: hdd.img - $(QEMU) $(QEMU_ARGS) -boot c -hda hdd.img - -dosbox-%: %.com fd1440.img - dosbox -c "mount P $(CURDIR)" -c P: -c "IMGMOUNT A: FD1440.IMG -t floppy" -c "$< $(ARGS)" -c pause -c exit - -emu2-%: %.com - emu2 $< $(ARGS) +qemu: fd$(FLOPPY).img + qemu-system-i386 $(shell test -w /dev/kvm && echo --enable-kvm) $(shell test -n "$DISPLAY" || echo --display curses) -boot a -drive file=fd$(FLOPPY).img,format=raw,index=0,if=floppy diff --git a/boot/hello.asm b/boot/hello.asm new file mode 100644 index 0000000..e163cb9 --- /dev/null +++ b/boot/hello.asm @@ -0,0 +1,8 @@ + org 0x7C00 + jmp short main + nop + nop +main mov ax, 0x0e37 + int 0x10 +halt hlt + jmp short halt diff --git a/host/as b/host/as new file mode 100755 index 0000000..60059ff --- /dev/null +++ b/host/as @@ -0,0 +1,4 @@ +#!/bin/sh +outfile="$1" +infile="$2" +( awk -f host/asm.awk -v of=sym <"$infile"; cat "$infile") | awk -f host/asm.awk -v of=lst diff --git a/host/asm.awk b/host/asm.awk new file mode 100644 index 0000000..dfa021d --- /dev/null +++ b/host/asm.awk @@ -0,0 +1,422 @@ +#!/usr/bin/awk -f +BEGIN { + if (!of) of="lst" + pos=0 + errors=0 + # lookup table for ord() + for (i = 32; i <= 128; i++) { + t=sprintf("%c", i) + _ord_[t]=i + } + # 8-bit general purpose registers + r8["al"]=0 + r8["cl"]=1 + r8["dl"]=2 + r8["bl"]=3 + r8["ah"]=4 + r8["ch"]=5 + r8["dh"]=6 + r8["bh"]=7 + # 16-bit general purpose registers + r16["ax"]=0 + r16["cx"]=1 + r16["dx"]=2 + r16["bx"]=3 + r16["sp"]=4 + r16["bp"]=5 + r16["si"]=6 + r16["di"]=7 + # segment registers + sreg["es"]=0 + sreg["cs"]=1 + sreg["ss"]=2 + sreg["ds"]=3 + # indirect access register combinations + modstr["bxsi"]=0 + modstr["sibx"]=0 + modstr["bxdi"]=1 + modstr["dibx"]=1 + modstr["bpsi"]=2 + modstr["sibp"]=2 + modstr["bpdi"]=3 + modstr["dibp"]=3 + modstr["si"]=4 + modstr["di"]=5 + modstr["bx"]=7 + # ALU operations + alu["add"]=0 + alu["or"]=1 + alu["adc"]=2 + alu["sbb"]=3 + alu["and"]=4 + alu["sub"]=5 + alu["xor"]=6 + alu["cmp"]=7 + # near conditional jumps + ops_rel8["jo"]=112+0 + ops_rel8["jno"]=112+1 + ops_rel8["jb"]=112+2 + ops_rel8["jc"]=112+2 + ops_rel8["jnae"]=112+2 + ops_rel8["jae"]=112+3 + ops_rel8["jnb"]=112+3 + ops_rel8["jnc"]=112+3 + ops_rel8["je"]=112+4 + ops_rel8["jz"]=112+4 + ops_rel8["jne"]=112+5 + ops_rel8["jnz"]=112+5 + ops_rel8["jbe"]=112+6 + ops_rel8["jna"]=112+6 + ops_rel8["ja"]=112+7 + ops_rel8["jnbe"]=112+7 + ops_rel8["js"]=112+8 + ops_rel8["jns"]=112+9 + ops_rel8["jp"]=112+10 + ops_rel8["jpe"]=112+10 + ops_rel8["jnp"]=112+11 + ops_rel8["jpo"]=112+11 + ops_rel8["jl"]=112+12 + ops_rel8["jnge"]=112+12 + ops_rel8["jge"]=112+13 + ops_rel8["jbl"]=112+13 + ops_rel8["jle"]=112+14 + ops_rel8["jng"]=112+14 + ops_rel8["jg"]=112+15 + ops_rel8["jnle"]=112+15 + # single-byte opcodes without operands + ops_sb["nop"]=128+16 + ops_sb["stosb"]=170 + ops_sb["stosw"]=171 + ops_sb["lodsb"]=172 + ops_sb["lodsw"]=173 + ops_sb["ret"]=195 + ops_sb["retf"]=203 + ops_sb["hlt"]=244 +} +# error string to insert into listing +function err(str) { + errors++ + if (of=="lst") printf("**** %s\n", str) +} +# submit a assembling result to output +# set label of current line to off +function submit(off) { + if (of=="lst") printf("%04X %-18s %s\n", off, hex, $0) + if (of=="hex" && hex) printf("%s", hex) + if (label) { + if (of=="sym") printf("%s\tsym\t%d\n", label, off) + if (label in prevsym && prevsym[label]!=off) { + err("label " label " different during second pass, was " prevsym[label] ", now " off) + } + sym[label]=off + } + pos=pos+length(hex)/2 + hex="" +} +# gets a symbol's values +function getsym(name) { + if (substr(name,1,1)=="0" || int(name)) { + return int(name) + } + if (substr(name,1,1)==".") name = plabel name + if (name in prevsym) { + return prevsym[name] + } else if (name in sym) { + return sym[name] + } + return 0 +} +# encode a signed byte +function pushb(val) { + #print("; pushb " val) + if (val<0 || val>=256) err("Value " val " does not fit in byte") + hex=hex sprintf("%02X",val) +} +# encode a signed byte (for rel8) +function pushsb(val) { + while (val < 0) val = val + 256 + pushb(val) +} +# encode a word +function pushw(val) { + while (val<0) val=val+65536 + t=sprintf("%04X",val) + hex=hex substr(t,3) substr(t,1,2) +} +# encode a modrm byte +function modrm(str, spare, rs) { + mod=3 + rm=0 + if (str in rs) { + mod=3 + rm=rs[str] + } else if (substr(str,1,1)=="[") { + rmap="" + gsub(/^\[|\]$/, "", str) + split(str,rmp,"+") + disp=0 + for (i in rmp) { + if (rmp[i] in r16) { + rmap=rmap rmp[i] + } else { + disp = disp + getsym(rmp[i]) + mod=2 + } + } + if (!rmap) { + mod=0 + rm=6 + } else if (rmap in modstr) { + rm=modstr[rmap] + } else { + err("Bad modR/M") + } + } + #print("; modR/M:", mod, spare, rm) + pushb(mod*64+spare*8+rm) + if (mod == 1) { + pushb(disp) + } else if (mod == 2 || (mod == 0 && rm == 6)) { + pushw(disp) + } +} +# common encoding: two operands, one is modrm, other in spare field +# last two bits of opcode specify width (byte/word) and whether modrm operand is first or second +function enc(opcode) { + +} +# common encoding: one operand encoded as modrm with fixed spare field +# operand can be byte or word, encoded in last bit of opcode +function encfs(opcode, spare) { + if (wordop) { + pushb(opcode) + modrm(op1, spare, r16) + } else if (byteop) { + pushb(opcode) + modrm(op1, spare, r8) + } +} + +# dont process empty lines or comment lines +/^( |\t)*(;|%)/ || /^( |\t)*$/ { + if (of=="lst") printf("%24s%s\n","",$0) + next +} +# load symbols from previous pass +$2=="sym" { + prevsym[$1]=int($3) + #printf("; %s (%s=%X)\n", $0,$1,prevsym[$1]) + next +} +# Start parsing the line +# and set up per-line vars +{ + label=$1 + gsub(/:$/, "",label) + opn=2 + split("", b, ":") + byteop=1 + wordop=1 + dwordop=1 +} +# no label on line! fixup +/^ / || /^\t/ { + label="" + opn=1 +} +{ + # implement local labels + if (substr(label,1,1)==".") { + label=plabel label + } else if (label) { + plabel=label + } + # take note if we got a instruction size specifier + op=$(opn) + if($(opn+1)=="byte" || $(opn+1)=="short") { + wordop=0 + dwordop=0 + opn++ + } else if ($(opn+1)=="word" || $(opn+1)=="near") { + byteop=0 + dwordop=0 + opn++ + } else if ($(opn+1)=="dword" || $(opn+1)=="far") { + byteop=0 + wordop=0 + opn++ + } + split("", a, ":") + c=0 + for (i=opn+1;i<=NF;i++) { + if (substr($(i),1,1)==";") break + a[++c]=$(i) + if (substr($(i),1,1)=="\"") { + do { + i++ + j=index($(i), "\"") + if (j) { + a[c]=a[c] " " substr($(i), 1, j) + break + } else { + a[c]=a[c] " " $(i) + } + } while($(i)) + } else { + gsub(/,$/, "", a[c]) + } + } + op1=a[1] + op2=a[2] + # pre-estimate if operand could be encoded as modrm + rm1=(op1 in r16) || (op1 in r8) || substr(op1,1,1)=="[" + rm2=(op2 in r16) || (op2 in r8) || substr(op2,1,1)=="[" + # if byte register in operands, it cant be a word or dword operation + if (op1 in r8 || op2 in r8) { + wordop=0 + dwordop=0 + } + # if word register in operands, it cant be a byte or dword operation + if (op1 in r16 || op2 in r16 || op2 in sreg) { + byteop=0 + dwordop=0 + } +} +# the source line is parsed by here: +# - op: opcode name +# - a: array of operands, starting with 1 +# - c: number of operands +# - byteop, wordop, dwordop: test before encoding, all 1 per default +# pseudo-opcodes +op=="cpu" { + next +} +op=="org" { + pos=getsym(a[1]) + submit(pos);next +} +op=="equ" { + val=getsym(a[1]) + submit(val);next +} +op=="db" { + for(i=1;i<=c;i++) { + if (substr(a[i],1,1)=="\"") { + for(j=2;j reg + if (rm1 && op2 in r8) { + pushb(136) # 88 + modrm(op1, r8[op2], r8) + } else if (rm1 && op2 in r16) { + pushb(137) # 89 + modrm(op1, r16[op2], r16) + } else if (op1 in r8 && rm2) { + pushb(138) + modrm(op2, r8[op1], r8) + } else if (op1 in r16 && rm2) { + pushb(139) + modrm(op2, r16[op1], r16) + # modrm <-> sreg + } else if (rm1 && op2 in sreg) { + pushb(140) + modrm(op1, sreg[op2], r16) + } else if (rm2 && op1 in sreg) { + pushb(142) + modrm(op2, sreg[op1], r16) + # reg <- imm + } else if (op1 in r8) { + pushb(176+r8[op1]) + pushb(getsym(op2)) + } else if (op1 in r16) { + pushb(184+r16[op1]) + pushw(getsym(op2)) + # modrm <- imm + } else if (byteop && rm1) { + pushb(198) + modrm(rm1, 0, r16) + } else if (wordop && rm1) { + pushb(199) + modrm(rm1, 0, r16) + } +} +op=="push" && op1 in r16 { + pushb(80+r16[op1]) +} +op=="pop" && op1 in r16 { + pushb(88+r16[op1]) +} +op=="test" && op2 in r8 { # 84 + pushb(132) + modrm(op1, r8[op2], r8) +} +op=="int" { # CD + pushb(205) + pushb(getsym(op1)) +} +op=="jmp" && byteop { + pushb(235) + pushsb(getsym(op1)-(pos+2)) +} +op=="jmp" && wordop { + pushb(233) + pushw(getsym(op1)-(pos+3)) +} +op=="call" { + pushb(232) + pushw(getsym(op1)-(pos+3)) +} +op=="neg" { + encfs(246, 3) +} +# opcodes with rel8 encoding +op in ops_rel8 && byteop { + pushb(ops_rel8[op]) + pushb(getsym(op1)-(pos+2)) +} +# opcodes without arguments +op in ops_sb { pushb(ops_sb[op]) } +{ + if (!hex) err("no encoding found") + submit(pos) +} +END{ + if (of=="hex") printf("\n") + if (errors) exit(1) +}