Generate jit, integrated it with the code generator and main loop, and enable it

This commit is contained in:
Fabian 2017-12-21 12:09:09 -06:00
parent 0c34310c99
commit 2a2f898a2a
8 changed files with 14431 additions and 28 deletions

493
gen/generate_jit.js Executable file
View file

@ -0,0 +1,493 @@
#!/usr/bin/env node
"use strict";
const fs = require("fs");
const encodings = require("./x86_table");
const c_ast = require("./c_ast");
const { hex } = require("./util");
gen_table();
function gen_read_imm_call(op, size_variant)
{
let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
{
if(op.imm8)
{
return "read_imm8()";
}
else if(op.imm8s)
{
return "read_imm8s()";
}
else
{
if(op.immaddr)
{
// immaddr: depends on address size
return "read_moffs()";
}
else
{
console.assert(op.imm1632 || op.imm16 || op.imm32);
if(op.imm1632 && size === 16 || op.imm16)
{
return "read_imm16()";
}
else
{
console.assert(op.imm1632 && size === 32 || op.imm32);
return "read_imm32s()";
}
}
}
}
else
{
return undefined;
}
}
function gen_call(name, args)
{
args = args || [];
return `${name}(${args.join(", ")});`;
}
function gen_codegen_call(name, args)
{
args = args || [];
const args_count = args.length;
args = [].concat([`"${name}"`, name.length], args);
return gen_call(`gen_fn${args_count}`, args);
}
function gen_codegen_call_modrm(name, args, is_cb)
{
args = (args || []).slice();
const args_count = args.length - 1; // minus 1 for the modrm_byte
args = [].concat([`"${name}"`, name.length], args);
return gen_call(`gen_modrm${is_cb ? "_cb" : ""}_fn${args_count}`, args);
}
function gen_modrm_mem_reg_split(name, mem_prefix_call, mem_args, reg_args)
{
let cb = false;
if(mem_args[mem_args.length-1].endsWith("()"))
{
cb = true;
mem_args = mem_args.slice();
mem_args[mem_args.length-1] = mem_args[mem_args.length-1].replace("()", "");
}
return {
type: "if-else",
if_blocks: [{
condition: "modrm_byte < 0xC0",
body: (mem_prefix_call ? [mem_prefix_call] : []).concat([gen_codegen_call_modrm(`${name}_mem`, mem_args, cb)]),
}],
else_block: {
body: [
gen_codegen_call(`${name}_reg`, reg_args)
],
},
};
}
/*
* Current naming scheme:
* instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
*/
function make_instruction_name(encoding, size, prefix_variant)
{
const suffix = encoding.os ? String(size) : "";
const opcode_hex = hex(encoding.opcode & 0xFF, 2);
const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
}
function gen_instruction_body(encodings, size)
{
const encoding = encodings[0];
let has_66 = false;
let has_F2 = false;
let has_F3 = false;
for(let e of encodings)
{
if((e.opcode >>> 16) === 0x66) has_66 = true;
if((e.opcode >>> 16) === 0xF2) has_F2 = true;
if((e.opcode >>> 16) === 0xF3) has_F3 = true;
}
if(has_66 || has_F2 || has_F3)
{
console.assert((encoding.opcode & 0xFF00) === 0x0F00);
}
const instruction_postfix = encoding.jump ? ["jit_jump = true;"] : [];
if(encoding.fixed_g !== undefined)
{
// instruction with modrm byte where the middle 3 bits encode the instruction
// group by opcode without prefix plus middle bits of modrm byte
let cases = encodings.reduce((cases_by_opcode, case_) => {
console.assert(typeof case_.fixed_g === "number");
cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
return cases_by_opcode;
}, Object.create(null));
cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
return [
"int32_t modrm_byte = read_imm8();",
{
type: "switch",
condition: "modrm_byte >> 3 & 7",
cases: cases.map(case_ => {
const fixed_g = case_.fixed_g;
const instruction_name = make_instruction_name(case_, size, undefined);
const instruction_postfix = case_.jump ? ["jit_jump = true;"] : [];
let modrm_resolve_prefix = undefined;
if(case_.requires_prefix_call)
{
modrm_resolve_prefix = gen_codegen_call(instruction_name + "_mem_pre");
}
const mem_args = ["modrm_byte"];
const reg_args = ["modrm_byte & 7"];
const imm_read = gen_read_imm_call(case_, size);
if(imm_read)
{
mem_args.push(imm_read);
reg_args.push(imm_read);
}
if(has_66 || has_F2 || has_F3)
{
const if_blocks = [];
if(has_66) {
const name = make_instruction_name(case_, size, 0x66);
const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
}
if(has_F2) {
const name = make_instruction_name(case_, size, 0xF2);
const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
}
if(has_F3) {
const name = make_instruction_name(case_, size, 0xF3);
const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
}
const else_block = {
body: [gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)],
};
return {
conditions: [fixed_g],
body: [
"int32_t prefixes_ = *prefixes;",
{
type: "if-else",
if_blocks,
else_block,
},
].concat(instruction_postfix),
};
}
else
{
const body = [
gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)
].concat(instruction_postfix);
return {
conditions: [fixed_g],
body,
};
}
}),
default_case: {
body: [
"assert(false);",
"trigger_ud();",
],
}
},
].concat(instruction_postfix);
}
else if(has_66 || has_F2 || has_F3)
{
// instruction withoud modrm byte but with prefix
console.assert(encoding.e);
console.assert(!encoding.ignore_mod);
console.assert(!encoding.requires_prefix_call, "Unexpected instruction (66/f2/f3 with prefix call)");
const imm_read = gen_read_imm_call(encoding, size);
const modrm_resolve_prefix = undefined;
const mem_args = ["modrm_byte", "modrm_byte >> 3 & 7"];
const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
if(imm_read)
{
mem_args.push(imm_read);
reg_args.push(imm_read);
}
const if_blocks = [];
if(has_66) {
const name = make_instruction_name(encoding, size, 0x66);
const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
}
if(has_F2) {
const name = make_instruction_name(encoding, size, 0xF2);
const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
}
if(has_F3) {
const name = make_instruction_name(encoding, size, 0xF3);
const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
}
const else_block = {
body: [gen_modrm_mem_reg_split(make_instruction_name(encoding, size), modrm_resolve_prefix, mem_args, reg_args)],
};
return [
"int32_t modrm_byte = read_imm8();",
"int32_t prefixes_ = *prefixes;",
{
type: "if-else",
if_blocks,
else_block,
}
].concat(instruction_postfix);
}
else if(encoding.fixed_g === undefined && encoding.e)
{
// instruction with modrm byte where the middle 3 bits encode a register
console.assert(encodings.length === 1);
const instruction_name = make_instruction_name(encoding, size);
const imm_read = gen_read_imm_call(encoding, size);
if(encoding.ignore_mod)
{
console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
// Has modrm byte, but the 2 mod bits are ignored and both
// operands are always registers (0f20-0f24)
return [
"int32_t modrm_byte = read_imm8();",
gen_codegen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
].concat(instruction_postfix);
}
else
{
let modrm_resolve_prefix = undefined;
if(encoding.requires_prefix_call)
{
modrm_resolve_prefix = gen_codegen_call(instruction_name + "_mem_pre");
}
const mem_args = ["modrm_byte", "modrm_byte >> 3 & 7"];
const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
if(imm_read)
{
mem_args.push(imm_read);
reg_args.push(imm_read);
}
return [
"int32_t modrm_byte = read_imm8();",
gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args),
].concat(instruction_postfix);
}
}
else if(encoding.prefix)
{
const instruction_name = make_instruction_name(encoding, size) + "_jit";
const args = [];
console.assert(instruction_postfix.length === 0);
return [gen_call(instruction_name)];
//return [gen_codegen_call(instruction_name, args)].concat(instruction_postfix);
}
else
{
// instruction without modrm byte or prefix
const imm_read = gen_read_imm_call(encoding, size);
const instruction_name = make_instruction_name(encoding, size);
const args = [];
if(imm_read)
{
args.push(imm_read);
}
if(encoding.extra_imm16)
{
console.assert(imm_read);
args.push("read_imm16()");
}
else if(encoding.extra_imm8)
{
console.assert(imm_read);
args.push("read_imm8()");
}
return [gen_codegen_call(instruction_name, args)].concat(instruction_postfix);
}
}
function gen_table()
{
let by_opcode = Object.create(null);
let by_opcode0f = Object.create(null);
for(let o of encodings)
{
let opcode = o.opcode;
if(opcode >= 0x100)
{
if((opcode & 0xFF00) === 0x0F00)
{
opcode &= 0xFF;
by_opcode0f[opcode] = by_opcode0f[opcode] || [];
by_opcode0f[opcode].push(o);
}
}
else
{
by_opcode[opcode] = by_opcode[opcode] || [];
by_opcode[opcode].push(o);
}
}
let cases = [];
for(let opcode = 0; opcode < 0x100; opcode++)
{
let encoding = by_opcode[opcode];
console.assert(encoding && encoding.length);
let opcode_hex = hex(opcode, 2);
if(encoding[0].os)
{
cases.push({
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, 16),
});
cases.push({
conditions: [`0x${opcode_hex}|0x100`],
body: gen_instruction_body(encoding, 32),
});
}
else
{
cases.push({
conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
body: gen_instruction_body(encoding, undefined),
});
}
}
const table = {
type: "switch",
condition: "opcode",
cases,
default_case: {
body: ["assert(false);"]
},
};
fs.writeFileSync("/tmp/jit", c_ast.print_syntax_tree([table]).join("\n") + "\n");
const cases0f_16 = [];
const cases0f_32 = [];
for(let opcode = 0; opcode < 0x100; opcode++)
{
let encoding = by_opcode0f[opcode];
if(!encoding)
{
encoding = [
{
opcode: 0x0F00 | opcode,
},
];
}
console.assert(encoding && encoding.length);
let opcode_hex = hex(opcode, 2);
if(encoding[0].os)
{
cases0f_16.push({
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, 16),
});
cases0f_32.push({
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, 32),
});
}
else
{
let block = {
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, undefined),
};
cases0f_16.push(block);
cases0f_32.push(block);
}
}
const table0f_16 = {
type: "switch",
condition: "opcode",
cases: cases0f_16,
default_case: {
body: ["assert(false);"]
},
};
const table0f_32 = {
type: "switch",
condition: "opcode",
cases: cases0f_32,
default_case: {
body: ["assert(false);"]
},
};
fs.writeFileSync("/tmp/jit0f_16", c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n");
fs.writeFileSync("/tmp/jit0f_32", c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n");
}

View file

@ -244,6 +244,9 @@ function V86Starter(options)
return f | 0;
},
"_get_time": () => Date.now(),
"_codegen_finalize": (virt_start, start, end) => cpu.codegen_finalize(virt_start, start, end),
"_codegen_call_cache": (start) => cpu.codegen_call_cache(start),
};
let wasm_file = DEBUG ? "v86-debug.wasm" : "v86.wasm";

View file

@ -20,6 +20,32 @@ function CPU(bus, wm, codegen)
this.memory_size = new Uint32Array(wm.memory.buffer, 812, 1);
{
const imports = {
"e": {
"m": this.wm.mem,
},
};
const exports = this.wm.instance.exports;
for(let name of Object.keys(exports))
{
if(name[0] !== "_")
{
continue;
}
imports["e"][name.slice(1)] = exports[name];
}
this.jit_imports = imports;
}
// XXX: Replace with wasm table
// XXX: Not garbage collected currently
this.instr_cache = Object.create(null);
// Note: Currently unused (degrades performance and not required by any OS
// that we support)
this.a20_enabled = new Int32Array(wm.memory.buffer, 552, 1);
@ -1379,6 +1405,78 @@ CPU.prototype.run_instruction_0f = function()
}
};
var seen_code = {};
CPU.prototype.codegen_call_cache = function(start)
{
//const before = this.instruction_pointer[0];
//dbg_log("calling cached generated code at " + h(before));
this.instr_cache[start]();
//const after = this.instruction_pointer[0];
//dbg_log("cached code block from " + h(before) + " to " + h(after));
};
CPU.prototype.codegen_finalize = function(virtual_start, start, end)
{
dbg_log("finalize");
const code = this.codegen.get_module_code();
//this.debug.dump_wasm(code);
let module;
if(DEBUG)
{
if(true && !seen_code[start])
{
this.debug.dump_wasm(code);
seen_code[start] = true;
const buffer = new Uint8Array(end - start + 1);
for(let i = start; i < end + 1; i++)
{
buffer[i - start] = this.read8(i);
}
this.debug.dump_code(this.is_32[0] ? 1 : 0, buffer, start);
}
try
{
module = new WebAssembly.Module(code);
}
catch(e)
{
//debugger;
console.log(e);
debugger;
//dump_file(code);
}
}
else
{
module = new WebAssembly.Module(code);
}
const instance = new WebAssembly.Instance(module, this.jit_imports);
const f = instance.exports["f"];
this.instr_cache[start] = f;
this.instruction_pointer[0] = virtual_start;
const before = this.instruction_pointer[0];
dbg_log("calling generated code at " + h(before >>> 0));
//debugger;
f();
const after = this.instruction_pointer[0];
dbg_log("code block from " + h(before >>> 0) + " to " + h(after >>> 0));
};
CPU.prototype.dbg_log = function()
{
dbg_log("from wasm: " + [].join.call(arguments));

View file

@ -20,4 +20,3 @@ extern double_t math_pow(double_t, double_t);
#include "instructions_0f.c"
#include "string.c"
#include "sse_instr.c"

View file

@ -173,5 +173,5 @@
#define MAX_INSTR_LEN 15
#define MAX_BLOCK_LENGTH ((1 << DIRTY_ARR_SHIFT) - MAX_INSTR_LEN)
#define ENABLE_JIT 0
#define ENABLE_JIT 1
#define ENABLE_PROFILER 0

View file

@ -7,6 +7,7 @@
#include "const.h"
#include "global_pointers.h"
#include "profiler.h"
#include "codegen/codegen.h"
// like memcpy, but only efficient for large (approximately 10k) sizes
// See memcpy in https://github.com/kripken/emscripten/blob/master/src/library.js
@ -55,7 +56,8 @@ void after_jump()
jit_jump = 1;
}
void diverged() {
void diverged()
{
after_jump();
}
@ -117,6 +119,8 @@ int32_t translate_address_write(int32_t address)
}
}
bool jit_in_progress = false; // XXX: For debugging
int32_t read_imm8()
{
int32_t eip = *instruction_pointer;
@ -129,6 +133,7 @@ int32_t read_imm8()
assert(!in_mapped_range(*eip_phys ^ eip));
int32_t data8 = mem8[*eip_phys ^ eip];
if(jit_in_progress) dbg_log("%x/8/%x", eip, data8);
*instruction_pointer = eip + 1;
return data8;
@ -150,6 +155,7 @@ int32_t read_imm16()
}
int32_t data16 = read16(*eip_phys ^ *instruction_pointer);
if(jit_in_progress) dbg_log("%x/16/%x", *instruction_pointer, data16);
*instruction_pointer = *instruction_pointer + 2;
return data16;
@ -164,6 +170,7 @@ int32_t read_imm32s()
}
int32_t data32 = read32s(*eip_phys ^ *instruction_pointer);
if(jit_in_progress) dbg_log("%x/32/%x", *instruction_pointer, data32);
*instruction_pointer = *instruction_pointer + 4;
return data32;
@ -222,7 +229,7 @@ int32_t get_seg_prefix_ds(int32_t offset) { return get_seg_prefix(DS) + offset;
int32_t get_seg_prefix_ss(int32_t offset) { return get_seg_prefix(SS) + offset; }
int32_t get_seg_prefix_cs(int32_t offset) { return get_seg_prefix(CS) + offset; }
static void run_instruction(int32_t);
void run_instruction(int32_t);
static int32_t resolve_modrm16(int32_t);
static int32_t resolve_modrm32(int32_t);
@ -243,6 +250,28 @@ uint32_t jit_hot_hash(uint32_t addr)
return addr % HASH_PRIME;
}
static void jit_instruction(int32_t);
void codegen_finalize(int32_t, int32_t, int32_t);
void codegen_call_cache(int32_t);
void generate_instruction(int32_t opcode)
{
gen_set_previous_eip();
gen_increment_instruction_pointer(0);
int32_t start_eip = *instruction_pointer - 1;
jit_instruction(opcode);
int32_t end_eip = *instruction_pointer;
int32_t instruction_length = end_eip - start_eip;
assert(instruction_length >= 0 && instruction_length < 16);
dbg_log("instruction_length=%d", instruction_length);
gen_patch_increment_instruction_pointer(instruction_length);
}
void cycle_internal()
{
#if ENABLE_JIT
@ -265,13 +294,12 @@ void cycle_internal()
bool cached = entry->start_addr == phys_addr && entry->is_32 == *is_32;
bool clean = entry->group_status == group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
if(cached && !clean)
{
// Remove the cached entry from the Table
jit_clear_func(addr_index);
}
const bool JIT_ALWAYS = false;
const bool JIT_DONT_USE_CACHE = false;
if(cached && clean)
if(!JIT_DONT_USE_CACHE &&
entry->group_status == group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT] &&
entry->start_addr == phys_addr)
{
// XXX: With the code-generation, we need to figure out how we
// would call the function from the other module here; likely
@ -279,6 +307,7 @@ void cycle_internal()
// Confirm that cache is not dirtied (through page-writes,
// mode switch, or just cache eviction)
/*
for(int32_t i = 0; i < entry->len; i++)
{
*previous_ip = *instruction_pointer;
@ -287,7 +316,10 @@ void cycle_internal()
assert(opcode == entry->opcode[i]);
run_instruction(entry->opcode[i] | !!*is_32 << 8);
(*timestamp_counter)++;
}
}*/
codegen_call_cache(phys_addr);
// XXX: Try to find an assert to detect self-modifying code
// JIT compiled self-modifying basic blocks may trigger this assert
// assert(entry->group_status != group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT]);
@ -295,10 +327,14 @@ void cycle_internal()
}
// A jump just occured indicating the start of a basic block + the
// address is hot; let's JIT compile it
else if(jit_jump == 1 && ++hot_code_addresses[jit_hot_hash(phys_addr)] > JIT_THRESHOLD)
else if(JIT_ALWAYS || jit_jump == 1 && ++hot_code_addresses[jit_hot_hash(phys_addr)] > JIT_THRESHOLD)
{
int32_t start_addr = *instruction_pointer;
jit_in_progress = false;
// Minimize collision based thrashing
hot_code_addresses[jit_hot_hash(phys_addr)] = 0;
jit_jump = 0;
entry->len = 0;
entry->start_addr = phys_addr;
@ -308,6 +344,8 @@ void cycle_internal()
*cache_compile = *cache_compile + 1;
gen_reset();
// XXX: Artificial limit allows jit_dirty_cache to be
// simplified by only dirtying 2 entries based on a mask
// (instead of all possible entries)
@ -316,23 +354,30 @@ void cycle_internal()
{
*previous_ip = *instruction_pointer;
int32_t opcode = read_imm8();
// XXX: Currently only includes opcode of final jmp, not operands
entry->end_addr = *eip_phys ^ *instruction_pointer;
entry->opcode[entry->len] = opcode;
entry->len++;
// XXX: Generate the instruction instead of running it
// XXX: If it's a jmp instruction, make sure
// generate_instruction sets jit_jump=1 and end_addr is set correctly
run_instruction(opcode | !!*is_32 << 8);
(*timestamp_counter)++;
generate_instruction(opcode | !!*is_32 << 8);
entry->end_addr = *eip_phys ^ *instruction_pointer;
}
jit_jump = 0;
gen_finish();
jit_in_progress = false;
codegen_finalize(start_addr, entry->start_addr, entry->end_addr);
assert(*prefixes == 0);
// When the hot instruction is a jmp (backwards),
// leave its group_status unupdated, thereby invalidating it
if (entry->end_addr > entry->start_addr)
{
entry->group_status = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
}
//if (entry->end_addr > entry->start_addr)
//{
entry->group_status = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
//}
}
// Regular un-hot code execution
else
@ -364,6 +409,12 @@ static void run_prefix_instruction()
run_instruction(read_imm8() | is_osize_32() << 8);
}
static void jit_prefix_instruction()
{
dbg_log("jit_prefix_instruction is32=%d", is_osize_32());
jit_instruction(read_imm8() | is_osize_32() << 8);
}
void clear_prefixes()
{
*prefixes = 0;
@ -377,6 +428,14 @@ void segment_prefix_op(int32_t seg)
*prefixes = 0;
}
void segment_prefix_op_jit(int32_t seg)
{
assert(seg <= 5);
gen_add_prefix_bits(seg + 1);
jit_prefix_instruction();
gen_clear_prefixes();
}
void do_many_cycles_unsafe()
{
for(int32_t k = 0; k < LOOP_COUNTER; k++)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff