diff --git a/src/browser/starter.js b/src/browser/starter.js index 2aa1c2d9..675ade00 100644 --- a/src/browser/starter.js +++ b/src/browser/starter.js @@ -172,25 +172,6 @@ function V86Starter(options) "_mmap_write16": function(addr, value) { return cpu.mmap_write16(addr, value); }, "_mmap_write32": function(addr, value) { return cpu.mmap_write32(addr, value); }, - "_fpu_op_D8_reg": function() { return cpu.fpu.op_D8_reg.apply(cpu.fpu, arguments); }, - "_fpu_op_D9_reg": function() { return cpu.fpu.op_D9_reg.apply(cpu.fpu, arguments); }, - "_fpu_op_DA_reg": function() { return cpu.fpu.op_DA_reg.apply(cpu.fpu, arguments); }, - "_fpu_op_DB_reg": function() { return cpu.fpu.op_DB_reg.apply(cpu.fpu, arguments); }, - "_fpu_op_DC_reg": function() { return cpu.fpu.op_DC_reg.apply(cpu.fpu, arguments); }, - "_fpu_op_DD_reg": function() { return cpu.fpu.op_DD_reg.apply(cpu.fpu, arguments); }, - "_fpu_op_DE_reg": function() { return cpu.fpu.op_DE_reg.apply(cpu.fpu, arguments); }, - "_fpu_op_DF_reg": function() { return cpu.fpu.op_DF_reg.apply(cpu.fpu, arguments); }, - - "_fpu_op_D8_mem": function() { return cpu.fpu.op_D8_mem.apply(cpu.fpu, arguments); }, - "_fpu_op_D9_mem": function() { return cpu.fpu.op_D9_mem.apply(cpu.fpu, arguments); }, - "_fpu_op_DA_mem": function() { return cpu.fpu.op_DA_mem.apply(cpu.fpu, arguments); }, - "_fpu_op_DB_mem": function() { return cpu.fpu.op_DB_mem.apply(cpu.fpu, arguments); }, - "_fpu_op_DC_mem": function() { return cpu.fpu.op_DC_mem.apply(cpu.fpu, arguments); }, - "_fpu_op_DD_mem": function() { return cpu.fpu.op_DD_mem.apply(cpu.fpu, arguments); }, - "_fpu_op_DE_mem": function() { return cpu.fpu.op_DE_mem.apply(cpu.fpu, arguments); }, - "_fpu_op_DF_mem": function() { return cpu.fpu.op_DF_mem.apply(cpu.fpu, arguments); }, - "_fwait": function() { return cpu.fpu.fwait(); }, - "_int_log2": function(val) { return v86util.int_log2(val); }, "_math_pow": function(x, y) { return Math.pow(x, y); }, @@ -244,6 +225,21 @@ function V86Starter(options) "_get_time": () => Date.now(), "_codegen_finalize": (cache_index, virt_start, start, end) => cpu.codegen_finalize(cache_index, virt_start, start, end), + + "_atan2": Math.atan2, + "_sin": Math.sin, + "_cos": Math.cos, + "_tan": Math.tan, + "_trunc": Math.trunc, + "_fmod": (x, y) => x % y, + "_llvm_exp2_f64": (x) => Math.pow(2, x), + "_log": Math.log, + "_round": Math.round, + }; + + const wasm_globals = { + "Infinity": Infinity, + "NaN": NaN, }; let wasm_file = DEBUG ? "v86-debug.wasm" : "v86.wasm"; @@ -259,7 +255,7 @@ function V86Starter(options) v86util.load_wasm( wasm_file, - { "env": wasm_shared_funcs }, + { "env": wasm_shared_funcs, "global" : wasm_globals }, options["memory_size"] + INTERNAL_MEM_SIZE, WASM_TABLE_SIZE, wm => { diff --git a/src/cpu.js b/src/cpu.js index 2b2f67c2..1234cdb8 100644 --- a/src/cpu.js +++ b/src/cpu.js @@ -192,6 +192,29 @@ function CPU(bus, wm, codegen) this.reg8s = new Int8Array(this.reg32s.buffer, 4, 32); this.reg8 = new Uint8Array(this.reg32s.buffer, 4, 32); + // Why no Float80Array :-( + this.fpu_st = new Float64Array(wm.memory.buffer, 968, 8); + + this.fpu_stack_empty = new Int32Array(wm.memory.buffer, 816, 1); + this.fpu_stack_empty[0] = 0xff; + this.fpu_stack_ptr = new Uint32Array(wm.memory.buffer, 1032, 1); + this.fpu_stack_ptr[0] = 0; + + this.fpu_control_word = new Int32Array(wm.memory.buffer, 1036, 1); + this.fpu_control_word[0] = 0x37F; + this.fpu_status_word = new Int32Array(wm.memory.buffer, 1040, 1); + this.fpu_status_word[0] = 0; + this.fpu_ip = new Int32Array(wm.memory.buffer, 1048, 1); + this.fpu_ip[0] = 0; + this.fpu_ip_selector = new Int32Array(wm.memory.buffer, 1052, 1); + this.fpu_ip_selector[0] = 0; + this.fpu_opcode = new Int32Array(wm.memory.buffer, 1044, 1); + this.fpu_opcode[0] = 0; + this.fpu_dp = new Int32Array(wm.memory.buffer, 1056, 1); + this.fpu_dp[0] = 0; + this.fpu_dp_selector = new Int32Array(wm.memory.buffer, 1060, 1); + this.fpu_dp_selector[0] = 0; + // mm0-mm7 split up into 32 bit pairs this.reg_mmxs = new Int32Array(wm.memory.buffer, 1064, 16); this.reg_mmx = new Uint32Array(this.reg_mmxs.buffer, 1064, 16); @@ -211,7 +234,6 @@ function CPU(bus, wm, codegen) this.fw_value = new Int32Array(wm.memory.buffer, 720, 1); this.io = undefined; - this.fpu = undefined; this.bus = bus; @@ -452,7 +474,6 @@ CPU.prototype.get_state = function() state[40] = this.sreg; state[41] = this.dreg; state[42] = this.mem8; - state[43] = this.fpu; state[45] = this.devices.virtio; state[46] = this.devices.apic; @@ -481,6 +502,16 @@ CPU.prototype.get_state = function() state[65] = this.reg_mmxs; state[66] = this.reg_xmm32s; + state[67] = this.fpu_st; + state[68] = this.fpu_stack_empty[0]; + state[69] = this.fpu_stack_ptr[0]; + state[70] = this.fpu_control_word[0]; + state[71] = this.fpu_ip[0]; + state[72] = this.fpu_ip_selector[0]; + state[73] = this.fpu_dp[0]; + state[74] = this.fpu_dp_selector[0]; + state[75] = this.fpu_opcode[0]; + return state; }; @@ -527,7 +558,6 @@ CPU.prototype.set_state = function(state) this.sreg.set(state[40]); this.dreg.set(state[41]); this.mem8.set(state[42]); - this.fpu = state[43]; this.devices.virtio = state[45]; this.devices.apic = state[46]; @@ -556,6 +586,16 @@ CPU.prototype.set_state = function(state) this.reg_mmxs.set(state[65]); this.reg_xmm32s.set(state[66]); + this.fpu_st.set(state[67]); + this.fpu_stack_empty[0] = state[68]; + this.fpu_stack_ptr[0] = state[69]; + this.fpu_control_word[0] = state[70]; + this.fpu_ip[0] = state[71]; + this.fpu_ip_selector[0] = state[72]; + this.fpu_dp[0] = state[73]; + this.fpu_dp_selector[0] = state[74]; + this.fpu_opcode[0] = state[75]; + this.full_clear_tlb(); // tsc_offset? @@ -849,8 +889,6 @@ CPU.prototype.init = function(settings, device_bus) this.devices.vga = new VGAScreen(this, device_bus, settings.vga_memory_size || 8 * 1024 * 1024); - this.fpu = new FPU(this); - this.devices.ps2 = new PS2(this, device_bus); this.devices.uart = new UART(this, 0x3F8, device_bus); @@ -1500,7 +1538,7 @@ CPU.prototype.set_cr0 = function(cr0) this.cr[0] = cr0; - if(!this.fpu) + if(false) { // if there's no FPU, keep emulation set this.cr[0] |= CR0_EM; @@ -3703,7 +3741,7 @@ CPU.prototype.cpuid = function() ecx = 1 << 23 | 1 << 30; // popcnt, rdrand var vme = 0 << 1; if(VMWARE_HYPERVISOR_PORT) ecx |= 1 << 31; // hypervisor - edx = (this.fpu ? 1 : 0) | // fpu + edx = (true /* have fpu */ ? 1 : 0) | // fpu vme | 1 << 3 | 1 << 4 | 1 << 5 | // vme, pse, tsc, msr 1 << 8 | 1 << 11 | 1 << 13 | 1 << 15 | // cx8, sep, pge, cmov 1 << 23 | 1 << 24 | 1 << 25 | 1 << 26; // mmx, fxsr, sse1, sse2 diff --git a/src/native/cpu.c b/src/native/cpu.c index 5615c9f4..8df56a03 100644 --- a/src/native/cpu.c +++ b/src/native/cpu.c @@ -827,6 +827,14 @@ void clear_tlb() memcpy_large(tlb_info, tlb_info_global, 0x100000); } +void task_switch_test() +{ + if(cr[0] & (CR0_EM | CR0_TS)) + { + trigger_nm(); + } +} + void task_switch_test_mmx() { if(*cr & (CR0_EM | CR0_TS)) diff --git a/src/native/cpu.h b/src/native/cpu.h index 6ad8c1e3..59c6759d 100644 --- a/src/native/cpu.h +++ b/src/native/cpu.h @@ -70,6 +70,7 @@ void write_xmm64(int32_t r, union reg64 data); void write_xmm128(int32_t r, int32_t i0, int32_t i1, int32_t i2, int32_t i3); void write_xmm_reg128(int32_t r, union reg128 data); void clear_tlb(void); +void task_switch_test(void); void task_switch_test_mmx(void); int32_t read_moffs(void); int32_t get_real_eip(void); diff --git a/src/native/fpu.c b/src/native/fpu.c index fb77f2fe..9441aa27 100644 --- a/src/native/fpu.c +++ b/src/native/fpu.c @@ -6,16 +6,52 @@ #include "global_pointers.h" #include "js_imports.h" #include "cpu.h" +#include "misc_instr.h" #include "log.h" #include "fpu.h" +const int32_t + FPU_C0 = 0x100, + FPU_C1 = 0x200, + FPU_C2 = 0x400, + FPU_C3 = 0x4000, + FPU_RESULT_FLAGS = FPU_C0 | FPU_C1 | FPU_C2 | FPU_C3, + FPU_STACK_TOP = 0x3800; -union double_int { +const int32_t + // precision, round & infinity control + FPU_PC = 3 << 8, + FPU_RC = 3 << 10, + FPU_IF = 1 << 12; + +// exception bits in the status word +const int32_t + FPU_EX_SF = 1 << 6, + FPU_EX_P = 1 << 5, + FPU_EX_U = 1 << 4, + FPU_EX_O = 1 << 3, + FPU_EX_Z = 1 << 2, + FPU_EX_D = 1 << 1, + FPU_EX_I = 1 << 0; + +const double_t + TWO_POW_63 = 0x8000000000000000; + +const double_t INDEFINITE_NAN = NAN; + + +union f64_int { uint8_t u8[8]; int32_t i32[2]; double_t f64; }; +union f32_int { + uint8_t u8[4]; + int32_t i32; + float_t f32; +}; + void fpu_set_tag_word(int32_t tag_word) { *fpu_stack_empty = 0; @@ -26,7 +62,7 @@ void fpu_set_tag_word(int32_t tag_word) } } -void fpu_fcomi(double_t y) +void fcomi(double_t y) { double_t x = fpu_st[*fpu_stack_ptr]; *flags_changed &= ~(1 | FLAG_PARITY | FLAG_ZERO); @@ -60,9 +96,15 @@ void fpu_set_status_word(int32_t sw) *fpu_stack_ptr = sw >> 11 & 7; } +// sign of a number on the stack +int32_t fpu_sign(int32_t i) +{ + return fpu_st8[(fpu_stack_ptr[0] + i & 7) << 3 | 7] >> 7; +} + void fpu_store_m80(uint32_t addr, double_t n) { - union double_int double_int_view = { .f64 = n }; + union f64_int double_int_view = { .f64 = n }; uint8_t sign = double_int_view.u8[7] & 0x80; int32_t exponent = (double_int_view.u8[7] & 0x7f) << 4 | double_int_view.u8[6] >> 4; @@ -93,7 +135,7 @@ void fpu_store_m80(uint32_t addr, double_t n) dbg_assert(exponent >= 0 && exponent < 0x8000); - writable_or_pagefault(addr, 10); + // writable_or_pagefault must have checked called by the caller! safe_write32(addr, low); safe_write32(addr + 4, high); safe_write16(addr + 8, sign << 8 | exponent); @@ -123,7 +165,7 @@ double_t fpu_load_m80(uint32_t addr) // TODO: NaN, Infinity //dbg_log("Load m80 TODO", LOG_FPU); - union double_int double_int_view; + union f64_int double_int_view; double_int_view.u8[7] = 0x7F | sign << 7; double_int_view.u8[6] = 0xF0 | high >> 30 << 3 & 0x08; @@ -151,3 +193,1419 @@ double_t fpu_load_m80(uint32_t addr) return mantissa * math_pow(2, exponent - 63); } +void fpu_stack_fault() +{ + // TODO: Interrupt + fpu_status_word[0] |= FPU_EX_SF | FPU_EX_I; +} + +void fpu_invalid_arithmatic() +{ + fpu_status_word[0] |= FPU_EX_I; +} + +double_t fpu_get_st0() +{ + if(fpu_stack_empty[0] >> fpu_stack_ptr[0] & 1) + { + fpu_status_word[0] &= ~FPU_C1; + fpu_stack_fault(); + return INDEFINITE_NAN; + } + else + { + return fpu_st[fpu_stack_ptr[0]]; + } +} + +void fcom(double_t y) +{ + double_t x = fpu_get_st0(); + + fpu_status_word[0] &= ~FPU_RESULT_FLAGS; + + if(x > y) + { + } + else if(y > x) + { + fpu_status_word[0] |= FPU_C0; + } + else if(x == y) + { + fpu_status_word[0] |= FPU_C3; + } + else + { + fpu_status_word[0] |= FPU_C0 | FPU_C2 | FPU_C3; + } +} + +void fucom(double_t y) +{ + // TODO + fcom(y); +} + + +void fucomi(double_t y) +{ + // TODO + fcomi(y); +} + +void ftst(double_t x) +{ + fpu_status_word[0] &= ~FPU_RESULT_FLAGS; + + if(isnan(x)) + { + fpu_status_word[0] |= FPU_C3 | FPU_C2 | FPU_C0; + } + else if(x == 0) + { + fpu_status_word[0] |= FPU_C3; + } + else if(x < 0) + { + fpu_status_word[0] |= FPU_C0; + } + + // TODO: unordered (x is nan, etc) +} + +void fxam(double_t x) +{ + fpu_status_word[0] &= ~FPU_RESULT_FLAGS; + fpu_status_word[0] |= fpu_sign(0) << 9; + + if(fpu_stack_empty[0] >> fpu_stack_ptr[0] & 1) + { + fpu_status_word[0] |= FPU_C3 | FPU_C0; + } + else if(isnan(x)) + { + fpu_status_word[0] |= FPU_C0; + } + else if(x == 0) + { + fpu_status_word[0] |= FPU_C3; + } + else if(x == INFINITY || x == -INFINITY) + { + fpu_status_word[0] |= FPU_C2 | FPU_C0; + } + else + { + fpu_status_word[0] |= FPU_C2; + } + // TODO: + // Unsupported, Denormal +} + +void finit() +{ + fpu_control_word[0] = 0x37F; + fpu_status_word[0] = 0; + fpu_ip[0] = 0; + fpu_dp[0] = 0; + fpu_opcode[0] = 0; + + fpu_stack_empty[0] = 0xFF; + fpu_stack_ptr[0] = 0; +} + + +int32_t fpu_load_tag_word() +{ + int32_t tag_word = 0; + + for(int32_t i = 0; i < 8; i++) + { + double_t value = fpu_st[i]; + + if(fpu_stack_empty[0] >> i & 1) + { + tag_word |= 3 << (i << 1); + } + else if(value == 0) + { + tag_word |= 1 << (i << 1); + } + else if(!isfinite(value)) + { + tag_word |= 2 << (i << 1); + } + } + + //dbg_log("load tw=" + h(tag_word) + " se=" + h(fpu_stack_empty[0]) + " sp=" + fpu_stack_ptr[0], LOG_FPU); + + return tag_word; +} + +void fpu_unimpl() +{ + if(DEBUG) + { + assert(false); + } + else + { + trigger_ud(); + } +} + +void fstenv(int32_t addr) +{ + if(is_osize_32()) + { + writable_or_pagefault(addr, 26); + + safe_write16(addr, fpu_control_word[0]); + + safe_write16(addr + 4, fpu_load_status_word()); + safe_write16(addr + 8, fpu_load_tag_word()); + + safe_write32(addr + 12, fpu_ip[0]); + safe_write16(addr + 16, fpu_ip_selector[0]); + safe_write16(addr + 18, fpu_opcode[0]); + safe_write32(addr + 20, fpu_dp[0]); + safe_write16(addr + 24, fpu_dp_selector[0]); + } + else + { + dbg_log("fstenv16"); + fpu_unimpl(); + } +} + +void fldenv(int32_t addr) +{ + if(is_osize_32()) + { + fpu_control_word[0] = safe_read16(addr); + + fpu_set_status_word(safe_read16(addr + 4)); + fpu_set_tag_word(safe_read16(addr + 8)); + + fpu_ip[0] = safe_read32s(addr + 12); + fpu_ip_selector[0] = safe_read16(addr + 16); + fpu_opcode[0] = safe_read16(addr + 18); + fpu_dp[0] = safe_read32s(addr + 20); + fpu_dp_selector[0] = safe_read16(addr + 24); + } + else + { + dbg_log("fldenv16"); + fpu_unimpl(); + } +} + +void fsave(int32_t addr) +{ + writable_or_pagefault(addr, 108); + + fstenv(addr); + addr += 28; + + for(int32_t i = 0; i < 8; i++) + { + fpu_store_m80(addr, fpu_st[fpu_stack_ptr[0] + i & 7]); + addr += 10; + } + + //dbg_log("save st=" + fpu_stack_ptr[0] + " " + [].slice.call(this.st), LOG_FPU); + + finit(); +} + +void frstor(int32_t addr) +{ + fldenv(addr); + addr += 28; + + for(int32_t i = 0; i < 8; i++) + { + fpu_st[(i + fpu_stack_ptr[0]) & 7] = fpu_load_m80(addr); + addr += 10; + } + + //dbg_log("rstor st=" + fpu_stack_ptr[0] + " " + [].slice.call(this.st), LOG_FPU); +} + +void fpu_push(double_t x) +{ + fpu_stack_ptr[0] = fpu_stack_ptr[0] - 1 & 7; + + if(fpu_stack_empty[0] >> fpu_stack_ptr[0] & 1) + { + fpu_status_word[0] &= ~FPU_C1; + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + fpu_st[fpu_stack_ptr[0]] = x; + } + else + { + fpu_status_word[0] |= FPU_C1; + fpu_stack_fault(); + fpu_st[fpu_stack_ptr[0]] = INDEFINITE_NAN; + } +} + +void fpu_pop() +{ + fpu_stack_empty[0] |= 1 << fpu_stack_ptr[0]; + fpu_stack_ptr[0] = fpu_stack_ptr[0] + 1 & 7; +} + +double_t fpu_get_sti(int32_t i) +{ + dbg_assert(i >= 0 && i < 8); + + i = i + fpu_stack_ptr[0] & 7; + + if(fpu_stack_empty[0] >> i & 1) + { + fpu_status_word[0] &= ~FPU_C1; + fpu_stack_fault(); + return INDEFINITE_NAN; + } + else + { + return fpu_st[i]; + } +} + +void fxtract() +{ + union f64_int double_int_view = { .f64 = fpu_get_st0() }; + + double_t exponent = ((double_int_view.u8[7] & 0x7F) << 4 | double_int_view.u8[6] >> 4) - 0x3FF; + + double_int_view.u8[7] = 0x3F | (double_int_view.u8[7] & 0x80); + double_int_view.u8[6] |= 0xF0; + + fpu_st[fpu_stack_ptr[0]] = exponent; + fpu_push(double_int_view.f64); +} + +double_t fpu_integer_round(double_t f) +{ + int32_t rc = fpu_control_word[0] >> 10 & 3; + + // XXX: See https://en.wikipedia.org/wiki/C_mathematical_functions + + if(rc == 0) + { + // Round to nearest, or even if equidistant + double_t rounded = round(f); + + if(rounded - f == 0.5 && (fmod(rounded, 2))) + { + // Special case: Math.round rounds to positive infinity + // if equidistant + rounded--; + } + + return rounded; + } + // rc=3 is truncate -> floor for positive numbers + else if(rc == 1 || (rc == 3 && f > 0)) + { + return floor(f); + } + else + { + return ceil(f); + } +} + +double_t fpu_truncate(double_t x) +{ + return x > 0 ? floor(x) : ceil(x); +} + +double_t fpu_load_m64(int32_t addr) +{ + // XXX: Use safe_read64s + int32_t low = safe_read32s(addr); + int32_t high = safe_read32s(addr + 4); + + union f64_int v = { .i32 = { low, high } }; + + return v.f64; +} + +void fpu_store_m64(int32_t addr, int32_t i) +{ + // XXX: Use safe_write64 + writable_or_pagefault(addr, 8); + + union f64_int v = { .f64 = fpu_get_sti(i) }; + + safe_write32(addr, v.i32[0]); + safe_write32(addr + 4, v.i32[1]); +} + +double_t fpu_load_m32(int32_t addr) +{ + union f32_int v = { .i32 = safe_read32s(addr) }; + return v.f32; +} + +void fpu_store_m32(int32_t addr, double_t x) +{ + union f32_int v = { .f32 = x }; + safe_write32(addr, v.i32); +} + + +void dbg_log_fpu_op(int32_t op, int32_t imm8) +{ +#if 0 + if(!FPU_LOG_OP) + { + return; + } + + if(imm8 >= 0xC0) + { + dbg_log(h(op, 2) + " " + h(imm8, 2) + "/" + (imm8 >> 3 & 7) + "/" + (imm8 & 7) + + " @" + h(this.cpu.instruction_pointer[0], 8) + " sp=" + fpu_stack_ptr[0] + " st=" + h(fpu_stack_empty[0], 2), LOG_FPU); + } + else + { + dbg_log(h(op, 2) + " /" + imm8 + + " @" + h(this.cpu.instruction_pointer[0], 8) + " sp=" + fpu_stack_ptr[0] + " st=" + h(fpu_stack_empty[0], 2), LOG_FPU); + } +#endif +} + + +void fwait() +{ + // NOP unless FPU instructions run in parallel with CPU instructions +} + +void fpu_op_D8_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xD8, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + double_t sti = fpu_get_sti(low); + double_t st0 = fpu_get_st0(); + + switch(mod) + { + case 0: + // fadd + fpu_st[fpu_stack_ptr[0]] = st0 + sti; + break; + case 1: + // fmul + fpu_st[fpu_stack_ptr[0]] = st0 * sti; + break; + case 2: + // fcom + fcom(sti); + break; + case 3: + // fcomp + fcom(sti); + fpu_pop(); + break; + case 4: + // fsub + fpu_st[fpu_stack_ptr[0]] = st0 - sti; + break; + case 5: + // fsubr + fpu_st[fpu_stack_ptr[0]] = sti - st0; + break; + case 6: + // fdiv + fpu_st[fpu_stack_ptr[0]] = st0 / sti; + break; + case 7: + // fdivr + fpu_st[fpu_stack_ptr[0]] = sti / st0; + break; + default: + dbg_assert(false); + } +} + +void fpu_op_D8_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xD8, mod); + + double_t m32 = fpu_load_m32(addr); + double_t st0 = fpu_get_st0(); + + switch(mod) + { + case 0: + // fadd + fpu_st[fpu_stack_ptr[0]] = st0 + m32; + break; + case 1: + // fmul + fpu_st[fpu_stack_ptr[0]] = st0 * m32; + break; + case 2: + // fcom + fcom(m32); + break; + case 3: + // fcomp + fcom(m32); + fpu_pop(); + break; + case 4: + // fsub + fpu_st[fpu_stack_ptr[0]] = st0 - m32; + break; + case 5: + // fsubr + fpu_st[fpu_stack_ptr[0]] = m32 - st0; + break; + case 6: + // fdiv + fpu_st[fpu_stack_ptr[0]] = st0 / m32; + break; + case 7: + // fdivr + fpu_st[fpu_stack_ptr[0]] = m32 / st0; + break; + default: + dbg_assert(false); + } +} + +void fpu_op_D9_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xD9, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + + switch(mod) + { + case 0: + // fld + { + double_t sti = fpu_get_sti(low); + fpu_push(sti); + } + break; + case 1: + // fxch + { + double_t sti = fpu_get_sti(low); + fpu_st[fpu_stack_ptr[0] + low & 7] = fpu_get_st0(); + fpu_st[fpu_stack_ptr[0]] = sti; + } + break; + case 2: + switch(low) + { + case 0: + // fnop + break; + default: + dbg_log("%x", low); + fpu_unimpl(); + } + break; + case 3: + // fstp1 + dbg_log("fstp1"); + fpu_unimpl(); + break; + case 4: + { + double_t st0 = fpu_get_st0(); + + switch(low) + { + case 0: + // fchs + fpu_st[fpu_stack_ptr[0]] = -st0; + break; + case 1: + // fabs + fpu_st[fpu_stack_ptr[0]] = fabs(st0); + break; + case 4: + ftst(st0); + break; + case 5: + fxam(st0); + break; + default: + dbg_log("%x", low); + fpu_unimpl(); + } + } + break; + case 5: + // fld1/fldl2t/fldl2e/fldpi/fldlg2/fldln2/fldz + switch(low) + { + case 0: fpu_push(1); break; + case 1: fpu_push(M_LN10 / M_LN2); break; + case 2: fpu_push(M_LOG2E); break; + case 3: fpu_push(M_PI); break; + case 4: fpu_push(M_LN2 / M_LN10); break; + case 5: fpu_push(M_LN2); break; + case 6: fpu_push(0); break; + case 7: dbg_log("d9/5/7"); fpu_unimpl(); break; + } + break; + case 6: + { + double_t st0 = fpu_get_st0(); + + switch(low) + { + case 0: + // f2xm1 + fpu_st[fpu_stack_ptr[0]] = pow(2, st0) - 1; + break; + case 1: + // fyl2x + fpu_st[fpu_stack_ptr[0] + 1 & 7] = fpu_get_sti(1) * log(st0) / M_LN2; + fpu_pop(); + break; + case 2: + // fptan + fpu_st[fpu_stack_ptr[0]] = tan(st0); + fpu_push(1); // no bug: push constant 1 + break; + case 3: + // fpatan + fpu_st[fpu_stack_ptr[0] + 1 & 7] = atan2(fpu_get_sti(1), st0); + fpu_pop(); + break; + case 4: + fxtract(); + break; + case 5: + // fprem1 + fpu_st[fpu_stack_ptr[0]] = fmod(st0, fpu_get_sti(1)); + break; + case 6: + // fdecstp + fpu_stack_ptr[0] = fpu_stack_ptr[0] - 1 & 7; + fpu_status_word[0] &= ~FPU_C1; + break; + case 7: + // fincstp + fpu_stack_ptr[0] = fpu_stack_ptr[0] + 1 & 7; + fpu_status_word[0] &= ~FPU_C1; + break; + default: + dbg_assert(false); + } + } + break; + case 7: + { + double_t st0 = fpu_get_st0(); + + switch(low) + { + case 0: + // fprem + { + double_t st1 = fpu_get_sti(1); + int32_t fprem_quotient = trunc(st0 / st1); + fpu_st[fpu_stack_ptr[0]] = fmod(st0, st1); + + fpu_status_word[0] &= ~(FPU_C0 | FPU_C1 | FPU_C3); + if (fprem_quotient & 1) { + fpu_status_word[0] |= FPU_C1; + } + if (fprem_quotient & (1 << 1)) { + fpu_status_word[0] |= FPU_C3; + } + if (fprem_quotient & (1 << 2)) { + fpu_status_word[0] |= FPU_C0; + } + + fpu_status_word[0] &= ~FPU_C2; + } + break; + case 1: + // fyl2xp1: y * log2(x+1) and pop + fpu_st[fpu_stack_ptr[0] + 1 & 7] = fpu_get_sti(1) * log(st0 + 1) / M_LN2; + fpu_pop(); + break; + case 2: + fpu_st[fpu_stack_ptr[0]] = sqrt(st0); + break; + case 3: + fpu_st[fpu_stack_ptr[0]] = sin(st0); + fpu_push(cos(st0)); + break; + case 4: + // frndint + fpu_st[fpu_stack_ptr[0]] = fpu_integer_round(st0); + break; + case 5: + // fscale + fpu_st[fpu_stack_ptr[0]] = st0 * pow(2, fpu_truncate(fpu_get_sti(1))); + break; + case 6: + fpu_st[fpu_stack_ptr[0]] = sin(st0); + break; + case 7: + fpu_st[fpu_stack_ptr[0]] = cos(st0); + break; + default: + dbg_assert(false); + } + } + break; + default: + dbg_assert(false); + } +} + +void fpu_op_D9_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xD9, mod); + + switch(mod) + { + case 0: + // fld + { + double_t data = fpu_load_m32(addr); + fpu_push(data); + } + break; + case 1: + // not defined + dbg_log("d9/1"); + fpu_unimpl(); + break; + case 2: + // fst + fpu_store_m32(addr, fpu_get_st0()); + break; + case 3: + // fstp + fpu_store_m32(addr, fpu_get_st0()); + fpu_pop(); + break; + case 4: + fldenv(addr); + break; + case 5: + // fldcw + { + int32_t word = safe_read16(addr); + fpu_control_word[0] = word; + } + break; + case 6: + fstenv(addr); + break; + case 7: + // fstcw + safe_write16(addr, fpu_control_word[0]); + break; + default: + dbg_assert(false); + } +} + +void fpu_op_DA_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xDA, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + + switch(mod) + { + case 0: + // fcmovb + if(test_b()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 1: + // fcmove + if(test_z()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 2: + // fcmovbe + if(test_be()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 3: + // fcmovu + if(test_p()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 5: + if(low == 1) + { + // fucompp + fucom(fpu_get_sti(1)); + fpu_pop(); + fpu_pop(); + } + else + { + dbg_log("%x", mod); + fpu_unimpl(); + } + break; + default: + dbg_log("%x", mod); + fpu_unimpl(); + } +} + +void fpu_op_DA_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xDA, mod); + + int32_t m32 = safe_read32s(addr); + double_t st0 = fpu_get_st0(); + + switch(mod) + { + case 0: + // fadd + fpu_st[fpu_stack_ptr[0]] = st0 + m32; + break; + case 1: + // fmul + fpu_st[fpu_stack_ptr[0]] = st0 * m32; + break; + case 2: + // fcom + fcom(m32); + break; + case 3: + // fcomp + fcom(m32); + fpu_pop(); + break; + case 4: + // fsub + fpu_st[fpu_stack_ptr[0]] = st0 - m32; + break; + case 5: + // fsubr + fpu_st[fpu_stack_ptr[0]] = m32 - st0; + break; + case 6: + // fdiv + fpu_st[fpu_stack_ptr[0]] = st0 / m32; + break; + case 7: + // fdivr + fpu_st[fpu_stack_ptr[0]] = m32 / st0; + break; + default: + dbg_assert(false); + } +} + +void fpu_op_DB_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xDB, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + + switch(mod) + { + case 0: + // fcmovnb + if(!test_b()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 1: + // fcmovne + if(!test_z()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 2: + // fcmovnbe + if(!test_be()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 3: + // fcmovnu + if(!test_p()) + { + fpu_st[fpu_stack_ptr[0]] = fpu_get_sti(low); + fpu_stack_empty[0] &= ~(1 << fpu_stack_ptr[0]); + } + break; + case 4: + if(imm8 == 0xE3) + { + finit(); + } + else if(imm8 == 0xE4) + { + // fsetpm + // treat as nop + } + else if(imm8 == 0xE1) + { + // fdisi + // also treat as nop + } + else if(imm8 == 0xE2) + { + // fclex + fpu_status_word[0] = 0; + } + else + { + dbg_log("%x", imm8); + fpu_unimpl(); + } + break; + case 5: + fucomi(fpu_get_sti(low)); + break; + case 6: + fcomi(fpu_get_sti(low)); + break; + default: + dbg_log("%x", mod); + fpu_unimpl(); + } +} + +int32_t convert_f64_to_i32(double_t); + +void fpu_op_DB_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xDB, mod); + + switch(mod) + { + case 0: + // fild + { + int32_t data = safe_read32s(addr); + fpu_push(data); + } + break; + case 2: + // fist + { + double_t st0 = fpu_integer_round(fpu_get_st0()); + int32_t i = convert_f64_to_i32(st0); + if(i == (int32_t)0x80000000) + { + // XXX: Probably not correct if st0 == 0x80000000 + fpu_invalid_arithmatic(); + } + safe_write32(addr, i); + } + break; + case 3: + // fistp + { + double_t st0 = fpu_integer_round(fpu_get_st0()); + int32_t i = convert_f64_to_i32(st0); + if(i == (int32_t)0x80000000) + { + // XXX: Probably not correct if st0 == 0x80000000 + // (input fits, but same value as error value) + fpu_invalid_arithmatic(); + } + safe_write32(addr, i); + fpu_pop(); + } + break; + case 5: + // fld + fpu_push(fpu_load_m80(addr)); + break; + case 7: + // fstp + writable_or_pagefault(addr, 10); + fpu_store_m80(addr, fpu_get_st0()); + fpu_pop(); + break; + default: + dbg_log("%x", mod); + fpu_unimpl(); + } +} + +void fpu_op_DC_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xDC, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + int32_t low_ptr = fpu_stack_ptr[0] + low & 7; + double_t sti = fpu_get_sti(low); + double_t st0 = fpu_get_st0(); + + switch(mod) + { + case 0: + // fadd + fpu_st[low_ptr] = sti + st0; + break; + case 1: + // fmul + fpu_st[low_ptr] = sti * st0; + break; + case 2: + // fcom + fcom(sti); + break; + case 3: + // fcomp + fcom(sti); + fpu_pop(); + break; + case 4: + // fsubr + fpu_st[low_ptr] = st0 - sti; + break; + case 5: + // fsub + fpu_st[low_ptr] = sti - st0; + break; + case 6: + // fdivr + fpu_st[low_ptr] = st0 / sti; + break; + case 7: + // fdiv + fpu_st[low_ptr] = sti / st0; + break; + default: + dbg_assert(false); + } +} + +void fpu_op_DC_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xDC, mod); + + double_t m64 = fpu_load_m64(addr); + double_t st0 = fpu_get_st0(); + + switch(mod) + { + case 0: + // fadd + fpu_st[fpu_stack_ptr[0]] = st0 + m64; + break; + case 1: + // fmul + fpu_st[fpu_stack_ptr[0]] = st0 * m64; + break; + case 2: + // fcom + fcom(m64); + break; + case 3: + // fcomp + fcom(m64); + fpu_pop(); + break; + case 4: + // fsub + fpu_st[fpu_stack_ptr[0]] = st0 - m64; + break; + case 5: + // fsubr + fpu_st[fpu_stack_ptr[0]] = m64 - st0; + break; + case 6: + // fdiv + fpu_st[fpu_stack_ptr[0]] = st0 / m64; + break; + case 7: + // fdivr + fpu_st[fpu_stack_ptr[0]] = m64 / st0; + break; + default: + dbg_assert(false); + } +} + +void fpu_op_DD_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xDD, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + + switch(mod) + { + case 0: + // ffree + fpu_stack_empty[0] |= 1 << (fpu_stack_ptr[0] + low & 7); + break; + case 2: + // fst + fpu_st[fpu_stack_ptr[0] + low & 7] = fpu_get_st0(); + break; + case 3: + // fstp + if(low == 0) + { + fpu_pop(); + } + else + { + fpu_st[fpu_stack_ptr[0] + low & 7] = fpu_get_st0(); + fpu_pop(); + } + break; + case 4: + fucom(fpu_get_sti(low)); + break; + case 5: + // fucomp + fucom(fpu_get_sti(low)); + fpu_pop(); + break; + default: + dbg_log("%x", mod); + fpu_unimpl(); + } +} + +void fpu_op_DD_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xDD, mod); + + switch(mod) + { + case 0: + // fld + { + double_t data = fpu_load_m64(addr); + fpu_push(data); + } + break; + case 1: + // fisttp + dbg_log("dd/fisttp"); + fpu_unimpl(); + break; + case 2: + // fst + fpu_store_m64(addr, 0); + break; + case 3: + // fstp + fpu_store_m64(addr, 0); + fpu_pop(); + break; + case 4: + frstor(addr); + break; + case 5: + // nothing + dbg_log("dd/5"); + fpu_unimpl(); + break; + case 6: + // fsave + fsave(addr); + break; + case 7: + // fnstsw / store status word + safe_write16(addr, fpu_load_status_word()); + break; + default: + dbg_assert(false); + } +} + + +void fpu_op_DE_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xDE, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + int32_t low_ptr = fpu_stack_ptr[0] + low & 7; + double_t sti = fpu_get_sti(low); + double_t st0 = fpu_get_st0(); + + switch(mod) + { + case 0: + // faddp + fpu_st[low_ptr] = sti + st0; + break; + case 1: + // fmulp + fpu_st[low_ptr] = sti * st0; + break; + case 2: + // fcomp + fcom(sti); + break; + case 3: + // fcompp + if(low == 1) + { + fcom(fpu_st[low_ptr]); + fpu_pop(); + } + else + { + // not a valid encoding + dbg_log("%x", mod); + fpu_unimpl(); + } + break; + case 4: + // fsubrp + fpu_st[low_ptr] = st0 - sti; + break; + case 5: + // fsubp + fpu_st[low_ptr] = sti - st0; + break; + case 6: + // fdivrp + fpu_st[low_ptr] = st0 / sti; + break; + case 7: + // fdivp + fpu_st[low_ptr] = sti / st0; + break; + default: + dbg_assert(false); + } + + fpu_pop(); +} + +void fpu_op_DE_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xDE, mod); + + int32_t m16 = safe_read16(addr) << 16 >> 16; + double_t st0 = fpu_get_st0(); + + switch(mod) + { + case 0: + // fadd + fpu_st[fpu_stack_ptr[0]] = st0 + m16; + break; + case 1: + // fmul + fpu_st[fpu_stack_ptr[0]] = st0 * m16; + break; + case 2: + // fcom + fcom(m16); + break; + case 3: + // fcomp + fcom(m16); + fpu_pop(); + break; + case 4: + // fsub + fpu_st[fpu_stack_ptr[0]] = st0 - m16; + break; + case 5: + // fsubr + fpu_st[fpu_stack_ptr[0]] = m16 - st0; + break; + case 6: + // fdiv + fpu_st[fpu_stack_ptr[0]] = st0 / m16; + break; + case 7: + // fdivr + fpu_st[fpu_stack_ptr[0]] = m16 / st0; + break; + default: + dbg_assert(false); + } +} + +void fpu_op_DF_reg(int32_t imm8) +{ + dbg_log_fpu_op(0xDF, imm8); + + int32_t mod = imm8 >> 3 & 7; + int32_t low = imm8 & 7; + + switch(mod) + { + case 4: + if(imm8 == 0xE0) + { + // fnstsw + reg16[AX] = fpu_load_status_word(); + } + else + { + dbg_log("%x", imm8); + fpu_unimpl(); + } + break; + case 5: + // fucomip + fucomi(fpu_get_sti(low)); + fpu_pop(); + break; + case 6: + // fcomip + fcomi(fpu_get_sti(low)); + fpu_pop(); + break; + default: + dbg_log("%x", mod); + fpu_unimpl(); + } +} + +void fpu_op_DF_mem(int32_t mod, int32_t addr) +{ + dbg_log_fpu_op(0xDF, mod); + + switch(mod) + { + case 0: + { + int32_t m16 = safe_read16(addr) << 16 >> 16; + fpu_push(m16); + } + break; + case 1: + // fisttp + dbg_log("df/fisttp"); + fpu_unimpl(); + break; + case 2: + // fist + { + double_t st0 = fpu_integer_round(fpu_get_st0()); + if(st0 <= 0x7FFF && st0 >= -0x8000) + { + safe_write16(addr, st0); + } + else + { + fpu_invalid_arithmatic(); + safe_write16(addr, 0x8000); + } + } + break; + case 3: + // fistp + { + double_t st0 = fpu_integer_round(fpu_get_st0()); + if(st0 <= 0x7FFF && st0 >= -0x8000) + { + safe_write16(addr, st0); + } + else + { + fpu_invalid_arithmatic(); + safe_write16(addr, 0x8000); + } + fpu_pop(); + } + break; + case 4: + // fbld + dbg_log("fbld"); + fpu_unimpl(); + break; + case 5: + // fild + // XXX: Use safe_read64s + { + uint32_t low = safe_read32s(addr); + int32_t high = safe_read32s(addr + 4); + + double_t m64 = (double_t)low + 0x100000000 * (double_t)high; + + fpu_push(m64); + } + break; + case 6: + // fbstp + dbg_log("fbstp"); + fpu_unimpl(); + break; + case 7: + { + // fistp + writable_or_pagefault(addr, 8); + + double_t st0 = fpu_integer_round(fpu_get_st0()); + + //union f64_int v = { .f64 = st0 }; + //dbg_log("fistp %x %x", v.i32[0], v.i32[1]); + + int32_t st0_low; + int32_t st0_high; + + if(st0 < TWO_POW_63 && st0 >= -TWO_POW_63) + { + int64_t st0_int = st0; + st0_low = st0_int; + st0_high = st0_int >> 32; + } + else + { + // write 0x8000000000000000 + st0_low = 0; + st0_high = 0x80000000; + fpu_invalid_arithmatic(); + } + + // XXX: Use safe_write64 + safe_write32(addr, st0_low); + safe_write32(addr + 4, st0_high); + + fpu_pop(); + } + break; + default: + dbg_assert(false); + } +} diff --git a/src/native/fpu.h b/src/native/fpu.h index b3c68476..f9bd3f7b 100644 --- a/src/native/fpu.h +++ b/src/native/fpu.h @@ -9,3 +9,21 @@ int32_t fpu_load_status_word(void); void fpu_set_status_word(int32_t sw); void fpu_store_m80(uint32_t addr, double_t n); double_t fpu_load_m80(uint32_t addr); +void fwait(); + +void fpu_op_D8_mem(int32_t, int32_t); +void fpu_op_D8_reg(int32_t); +void fpu_op_D9_mem(int32_t, int32_t); +void fpu_op_D9_reg(int32_t); +void fpu_op_DA_mem(int32_t, int32_t); +void fpu_op_DA_reg(int32_t); +void fpu_op_DB_mem(int32_t, int32_t); +void fpu_op_DB_reg(int32_t); +void fpu_op_DC_mem(int32_t, int32_t); +void fpu_op_DC_reg(int32_t); +void fpu_op_DD_mem(int32_t, int32_t); +void fpu_op_DD_reg(int32_t); +void fpu_op_DE_mem(int32_t, int32_t); +void fpu_op_DE_reg(int32_t); +void fpu_op_DF_mem(int32_t, int32_t); +void fpu_op_DF_reg(int32_t); diff --git a/src/native/instructions.c b/src/native/instructions.c index 1d7e098b..f899f2b5 100644 --- a/src/native/instructions.c +++ b/src/native/instructions.c @@ -8,6 +8,7 @@ #include "log.h" #include "arith.h" #include "cpu.h" +#include "fpu.h" #include "shared.h" #include "misc_instr.h" #include "string.h" diff --git a/src/native/js_imports.h b/src/native/js_imports.h index 05573af8..c90705e3 100644 --- a/src/native/js_imports.h +++ b/src/native/js_imports.h @@ -33,24 +33,7 @@ extern void enter16(int32_t, int32_t); extern void enter32(int32_t, int32_t); extern void far_jump(int32_t, int32_t, int32_t); extern void far_return(int32_t, int32_t, int32_t); -extern void fpu_op_D8_mem(int32_t, int32_t); -extern void fpu_op_D8_reg(int32_t); -extern void fpu_op_D9_mem(int32_t, int32_t); -extern void fpu_op_D9_reg(int32_t); -extern void fpu_op_DA_mem(int32_t, int32_t); -extern void fpu_op_DA_reg(int32_t); -extern void fpu_op_DB_mem(int32_t, int32_t); -extern void fpu_op_DB_reg(int32_t); -extern void fpu_op_DC_mem(int32_t, int32_t); -extern void fpu_op_DC_reg(int32_t); -extern void fpu_op_DD_mem(int32_t, int32_t); -extern void fpu_op_DD_reg(int32_t); -extern void fpu_op_DE_mem(int32_t, int32_t); -extern void fpu_op_DE_reg(int32_t); -extern void fpu_op_DF_mem(int32_t, int32_t); -extern void fpu_op_DF_reg(int32_t); extern void full_clear_tlb(void); -extern void fwait(void); extern void handle_irqs(void); extern void hlt_op(void); extern void invlpg(int32_t); @@ -64,7 +47,6 @@ extern void mmap_write32(uint32_t, int32_t); extern void mmap_write8(uint32_t, int32_t); extern void popa16(void); extern void popa32(void); -extern void task_switch_test(void); extern void todo(void); extern void undefined_instruction(void); extern void unimplemented_sse(void);