#include #include #include #include uint8_t mem[64*1024]; union regset { uint8_t r8[8]; // AL, AH, CL, CH... uint16_t r16[8]; // AX, CX, DX, BX, SP, BP, SI, DI } regset; typedef struct flagbits { unsigned int c : 1; unsigned int u1 : 1; unsigned int p : 1; unsigned int u3 : 1; unsigned int a : 1; unsigned int u5 : 1; unsigned int z : 1; unsigned int s : 1; unsigned int t : 1; unsigned int i : 1; unsigned int d : 1; unsigned int o : 1; unsigned int u12 : 4; } flagbits; union flags { uint16_t reg; struct flagbits bit; } flags; uint16_t ip = 0x100; uint16_t scratch; uint16_t prefixes = 0; uint16_t reg; void* rmptr; #define RM8 *(uint8_t*)rmptr #define RM16 *(uint16_t*)rmptr #define R8(x) regset.r8[fixr8ref(x)] #define R16(x) regset.r16[x] #define AL regset.r8[0] #define AH regset.r8[1] #define CL regset.r8[2] #define DL regset.r8[4] #define AX regset.r16[0] #define CX regset.r16[1] #define BX regset.r16[3] #define SP regset.r16[4] #define BP regset.r16[5] #define SI regset.r16[6] #define DI regset.r16[7] #define FLAGMASK 0xF02A #define ADD 0 #define OR 1 #define ADC 2 #define SBB 3 #define AND 4 #define SUB 5 #define XOR 6 #define CMP 7 #define TEST 8 #define PREFIX_REPE 1 #define PREFIX_REPNE 2 const char* const regnames[] = { "AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI", 0 }; // Dump registers void dump() { printf("\n"); int i; for (i=0; i<8; i++) { printf("%s=%04X ", regnames[i], regset.r16[i]); } printf("%c", flags.bit.o ? 'O' : '-'); printf("%c", flags.bit.d ? 'D' : '-'); printf("%c", flags.bit.i ? 'I' : '-'); printf("%c", flags.bit.t ? 'T' : '-'); printf("%c", flags.bit.s ? 'S' : '-'); printf("%c", flags.bit.z ? 'Z' : '-'); printf("%c", flags.bit.a ? 'A' : '-'); printf("%c", flags.bit.p ? 'P' : '-'); printf("%c", flags.bit.c ? 'C' : '-'); printf("\n"); printf("IP=%04X : %02X %02X\n", ip, mem[ip], mem[ip+1]); } void arith8(int op, int8_t* dst, int src) { // I'll just assume this sign-extends int t = *dst; int p = *dst; switch (op) { case ADD: t += src; break; case OR: t |= src; break; case ADC: t += (src + flags.bit.c); break; case SBB: t -= (src + flags.bit.c); break; case AND: t &= src; break; case SUB: t -= src; break; case XOR: t ^= src; break; case CMP: t -= src; break; case TEST: t &= src; break; } flags.bit.z = !t; flags.bit.c = !!((t ^ p) >> 8); if (op==CMP || op==TEST) return; *dst = t; } void arith16(int op, int16_t* dst, int src) { // I'll just assume this sign-extends int t = *dst; int p = *dst; switch (op) { case ADD: t += src; break; case OR: t |= src; break; case ADC: t += (src + flags.bit.c); break; case SBB: t -= (src + flags.bit.c); break; case AND: t &= src; break; case SUB: t -= src; break; case XOR: t ^= src; break; case CMP: t -= src; break; case TEST: t &= src; break; } flags.bit.z = !t; flags.bit.c = !!((t ^ p) >> 8); if (op==CMP || op==TEST) return; *dst = t; } int fixr8ref(int r) { return ((r << 1) & 0b110) | ((r >> 2) & 0b1); } uint8_t imm8() { uint8_t r = *(uint8_t*)&mem[ip]; ip++; return r; } uint16_t imm16() { uint16_t r = *(uint16_t*)&mem[ip]; ip+=2; return r; } #define BIT8 8 #define BIT16 16 void modrm(int size) { uint8_t mod = (mem[ip] >> 6) & 0b11; // highest 2 bits reg = (mem[ip] >> 3) & 0b111; uint8_t rm = (mem[ip]) & 0b111; ip++; if (mod == 3) { if (size == BIT16) { rmptr = &R16(rm); } else { rmptr = &R8(rm); } return; } if (mod == 0 && rm == 6) { rmptr = mem + (int16_t)imm16(); return; } switch(rm) { case 0: rmptr = mem + BX + SI; break; case 1: rmptr = mem + BX + DI; break; case 2: rmptr = mem + BP + SI; break; case 3: rmptr = mem + BP + DI; break; case 4: rmptr = mem + SI; break; case 5: rmptr = mem + DI; break; case 6: rmptr = mem + BP; break; case 7: rmptr = mem + BX; break; } if (mod == 1) rmptr += (int8_t)imm8(); if (mod == 2) rmptr += (int16_t)imm16(); } void push(uint16_t v) { SP -= 2; *(uint16_t*)&mem[SP] = v; } uint16_t pop() { uint16_t v = *(uint16_t*)&mem[SP]; SP += 2; return v; } void cpm_syscall(int number) { switch(number) { case 0: exit(0); break; case 2: printf("%c", DL); break; default: fprintf(stderr, "Fatal: Unhandled CP/M syscall %Xh\n", number); exit(1); } } void dos_syscall(int number) { switch(number) { case 0: exit(0); break; default: fprintf(stderr, "Fatal: Unhandled DOS syscall %Xh\n", number); exit(1); } } void handle_intr(int number) { switch(number) { case 0x20: dos_syscall(0); break; case 0x21: dos_syscall(AH); break; case 0xE0: cpm_syscall(CL); break; default: fprintf(stderr, "Fatal: Unhandled interrupt %Xh\n", number); exit(1); } } // Execute a single instruction void step() { uint8_t opcode = mem[ip]; ip++; switch(opcode) { case 0x00: // ARITH r/m8,reg8 case 0x08: case 0x10: case 0x18: case 0x20: case 0x28: case 0x30: case 0x38: modrm(BIT8); arith8(opcode >> 3, &RM8, R8(reg)); break; case 0x01: // ARITH r/m16,reg16 case 0x09: case 0x11: case 0x19: case 0x21: case 0x29: case 0x31: case 0x39: modrm(BIT16); arith16(opcode >> 3, &RM16, R16(reg)); break; case 0x40 ... 0x47: // INC reg16 R16(opcode - 0x40)++; flags.bit.z=!R16(opcode - 0x40); break; case 0x48 ... 0x4F: // DEC reg16 R16(opcode - 0x48)--; flags.bit.z=!R16(opcode - 0x48); break; case 0x50 ... 0x57: // PUSH reg16 push(R16(opcode-0x50)); break; case 0x58 ... 0x5F: // POP reg16 R16(opcode-0x58)=pop(); break; case 0x70 ... 0x7F: // Jcc reg = ip + (int8_t)imm8(); switch((opcode-0x70) >> 1) { case 0: scratch = flags.bit.o; break; case 1: scratch = flags.bit.s; break; case 2: scratch = flags.bit.z; break; case 3: scratch = flags.bit.c; break; case 4: scratch = flags.bit.c || flags.bit.z; break; case 5: scratch = flags.bit.s != flags.bit.o; break; case 6: scratch = flags.bit.z || flags.bit.s != flags.bit.o; break; case 7: scratch = flags.bit.p; break; } if (!!(opcode & 1) != scratch) { ip = reg; } break; case 0x83: // ARITH r/m16,imm8 modrm(BIT16); arith16(reg, &RM16, imm8()); break; case 0x84: // TEST r/m8,reg8 modrm(BIT8); scratch = R8(reg) & RM8; break; case 0x88: // MOV r/m8,reg8 modrm(BIT8); RM8 = R8(reg); break; case 0x89: // MOV r/m16,reg16 modrm(BIT16); RM16 = R16(reg); break; case 0x8B: // MOV reg16,r/m16 modrm(BIT16); R16(reg) = RM16; break; case 0x9C: // PUSHF push(flags.reg | FLAGMASK); break; case 0xA1: // MOV AX,memoffs16 AX = *(uint16_t*)&mem[imm16()]; break; case 0xAA: // STOSB scratch = 1; if (prefixes & PREFIX_REPE) { scratch = CX; CX = 0; } for(;scratch>0;scratch--) { mem[DI] = AL; DI += 1 - (flags.bit.d << 1); } break; case 0xAC: // LODSB scratch = 1; if (prefixes & PREFIX_REPE) { scratch = CX; CX = 0; } for(;scratch>0;scratch--) { AL = mem[SI]; SI += 1 - (flags.bit.d << 1); } break; case 0xB0 ... 0xB7: // MOV reg8,imm8 reg = opcode-0xB0; R8(reg) = imm8(); break; case 0xB8 ... 0xBF: // MOV reg16,imm16 reg = opcode-0xB8; R16(reg) = imm16(); break; case 0xC3: // RET ip = pop(); break; case 0xCC: // INT3 dump(); break; case 0xCD: // INT imm8 handle_intr(imm8()); break; case 0xE8: // CALL rw reg = ip + (int16_t)imm16(); if (reg == 5) { cpm_syscall(CL); } else { push(ip); ip = reg; } break; case 0xE9: reg = imm16(); ip += (int16_t)reg; break; case 0xEB: // JMP rb ip = ip + (int8_t)imm8(); break; case 0xF2: prefixes |= PREFIX_REPNE; return; case 0xF3: prefixes |= PREFIX_REPE; return; case 0xFA: // CLI flags.bit.i = 0; break; case 0xFB: // STI flags.bit.i = 1; break; default: ip--; dump(); fprintf(stderr, "Invalid opcode at IP=%04X\n", ip); exit(1); break; } prefixes = 0; if (flags.bit.t) dump(); } void copy_cmdline(char* str) { int i, c; uint8_t *len = &mem[0x80]; char* ptr = (char*)&mem[0x81]; c = strlen(str); // Clip at max length if (c>0x7E) { fprintf(stderr, "Command line too long, max is 126 bytes\n"); exit(1); } memcpy(ptr, str, c); ptr[c]=0x0D; *len=c; } int main(int argc, char** argv) { memset(&mem, sizeof(mem), 0); memset(®set, sizeof(regset), 0); // Prepare default exit into int 20h mem[0]=0xCD; mem[1]=0x20; push(0); argc--; argv++; while(argc && argv[0][0]=='-') { switch(argv[0][1]) { case 't': flags.bit.t = 1; break; default: fprintf(stderr, "Unknown option %s\n", argv[0]); exit(1); break; } argc--; argv++; } if (argc) { FILE* fd = fopen(argv[0], "r"); fread(mem + ip, 1, sizeof(mem) - ip, fd); argc--; argv++; } else { fprintf(stderr, "No COM file specified\n"); exit(1); } if (argc) { copy_cmdline(argv[0]); } else { copy_cmdline(""); } while(1) { step(); } }