Generate jit, integrated it with the code generator and main loop, and enable it

2017-12-21 12:09:09 -06:00 · 2017-12-21 12:09:09 -06:00 · 2a2f898a2a
parent 0c34310c99
commit 2a2f898a2a
8 changed files with 14431 additions and 28 deletions
--- a/gen/generate_jit.js
+++ b/gen/generate_jit.js
@ -0,0 +1,493 @@
+#!/usr/bin/env node
+"use strict";
+
+const fs = require("fs");
+const encodings = require("./x86_table");
+const c_ast = require("./c_ast");
+const { hex } = require("./util");
+
+gen_table();
+
+
+function gen_read_imm_call(op, size_variant)
+{
+    let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
+
+    if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
+    {
+        if(op.imm8)
+        {
+            return "read_imm8()";
+        }
+        else if(op.imm8s)
+        {
+            return "read_imm8s()";
+        }
+        else
+        {
+            if(op.immaddr)
+            {
+                // immaddr: depends on address size
+                return "read_moffs()";
+            }
+            else
+            {
+                console.assert(op.imm1632 || op.imm16 || op.imm32);
+
+                if(op.imm1632 && size === 16 || op.imm16)
+                {
+                    return "read_imm16()";
+                }
+                else
+                {
+                    console.assert(op.imm1632 && size === 32 || op.imm32);
+                    return "read_imm32s()";
+                }
+            }
+        }
+    }
+    else
+    {
+        return undefined;
+    }
+}
+
+function gen_call(name, args)
+{
+    args = args || [];
+    return `${name}(${args.join(", ")});`;
+}
+
+function gen_codegen_call(name, args)
+{
+    args = args || [];
+    const args_count = args.length;
+    args = [].concat([`"${name}"`, name.length], args);
+    return gen_call(`gen_fn${args_count}`, args);
+}
+
+function gen_codegen_call_modrm(name, args, is_cb)
+{
+    args = (args || []).slice();
+    const args_count = args.length - 1; // minus 1 for the modrm_byte
+    args = [].concat([`"${name}"`, name.length], args);
+    return gen_call(`gen_modrm${is_cb ? "_cb" : ""}_fn${args_count}`, args);
+}
+
+function gen_modrm_mem_reg_split(name, mem_prefix_call, mem_args, reg_args)
+{
+    let cb = false;
+
+    if(mem_args[mem_args.length-1].endsWith("()"))
+    {
+        cb = true;
+        mem_args = mem_args.slice();
+        mem_args[mem_args.length-1] = mem_args[mem_args.length-1].replace("()", "");
+    }
+
+    return {
+        type: "if-else",
+        if_blocks: [{
+            condition: "modrm_byte < 0xC0",
+            body: (mem_prefix_call ? [mem_prefix_call] : []).concat([gen_codegen_call_modrm(`${name}_mem`, mem_args, cb)]),
+        }],
+        else_block: {
+            body: [
+                gen_codegen_call(`${name}_reg`, reg_args)
+            ],
+        },
+    };
+}
+
+/*
+ * Current naming scheme:
+ * instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
+ */
+
+function make_instruction_name(encoding, size, prefix_variant)
+{
+    const suffix = encoding.os ? String(size) : "";
+    const opcode_hex = hex(encoding.opcode & 0xFF, 2);
+    const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
+    const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
+    const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
+
+    return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
+}
+
+function gen_instruction_body(encodings, size)
+{
+    const encoding = encodings[0];
+
+    let has_66 = false;
+    let has_F2 = false;
+    let has_F3 = false;
+
+    for(let e of encodings)
+    {
+        if((e.opcode >>> 16) === 0x66) has_66 = true;
+        if((e.opcode >>> 16) === 0xF2) has_F2 = true;
+        if((e.opcode >>> 16) === 0xF3) has_F3 = true;
+    }
+
+    if(has_66 || has_F2 || has_F3)
+    {
+        console.assert((encoding.opcode & 0xFF00) === 0x0F00);
+    }
+
+    const instruction_postfix = encoding.jump ? ["jit_jump = true;"] : [];
+
+    if(encoding.fixed_g !== undefined)
+    {
+        // instruction with modrm byte where the middle 3 bits encode the instruction
+
+        // group by opcode without prefix plus middle bits of modrm byte
+        let cases = encodings.reduce((cases_by_opcode, case_) => {
+            console.assert(typeof case_.fixed_g === "number");
+            cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
+            return cases_by_opcode;
+        }, Object.create(null));
+        cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
+
+        return [
+            "int32_t modrm_byte = read_imm8();",
+            {
+                type: "switch",
+                condition: "modrm_byte >> 3 & 7",
+                cases: cases.map(case_ => {
+                    const fixed_g = case_.fixed_g;
+                    const instruction_name = make_instruction_name(case_, size, undefined);
+                    const instruction_postfix = case_.jump ? ["jit_jump = true;"] : [];
+
+                    let modrm_resolve_prefix = undefined;
+
+                    if(case_.requires_prefix_call)
+                    {
+                        modrm_resolve_prefix = gen_codegen_call(instruction_name + "_mem_pre");
+                    }
+
+                    const mem_args = ["modrm_byte"];
+                    const reg_args = ["modrm_byte & 7"];
+
+                    const imm_read = gen_read_imm_call(case_, size);
+                    if(imm_read)
+                    {
+                        mem_args.push(imm_read);
+                        reg_args.push(imm_read);
+                    }
+
+                    if(has_66 || has_F2 || has_F3)
+                    {
+                        const if_blocks = [];
+
+                        if(has_66) {
+                            const name = make_instruction_name(case_, size, 0x66);
+                            const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
+                        }
+                        if(has_F2) {
+                            const name = make_instruction_name(case_, size, 0xF2);
+                            const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
+                        }
+                        if(has_F3) {
+                            const name = make_instruction_name(case_, size, 0xF3);
+                            const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
+                            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
+                        }
+
+                        const else_block = {
+                            body: [gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)],
+                        };
+
+                        return {
+                            conditions: [fixed_g],
+                            body: [
+                                "int32_t prefixes_ = *prefixes;",
+                                {
+                                    type: "if-else",
+                                    if_blocks,
+                                    else_block,
+                                },
+                            ].concat(instruction_postfix),
+                        };
+                    }
+                    else
+                    {
+                        const body = [
+                            gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args)
+                        ].concat(instruction_postfix);
+
+                        return {
+                            conditions: [fixed_g],
+                            body,
+                        };
+                    }
+                }),
+
+                default_case: {
+                    body: [
+                        "assert(false);",
+                        "trigger_ud();",
+                    ],
+                }
+            },
+        ].concat(instruction_postfix);
+    }
+    else if(has_66 || has_F2 || has_F3)
+    {
+        // instruction withoud modrm byte but with prefix
+
+        console.assert(encoding.e);
+        console.assert(!encoding.ignore_mod);
+        console.assert(!encoding.requires_prefix_call, "Unexpected instruction (66/f2/f3 with prefix call)");
+
+        const imm_read = gen_read_imm_call(encoding, size);
+        const modrm_resolve_prefix = undefined;
+
+        const mem_args = ["modrm_byte", "modrm_byte >> 3 & 7"];
+        const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
+
+        if(imm_read)
+        {
+            mem_args.push(imm_read);
+            reg_args.push(imm_read);
+        }
+
+        const if_blocks = [];
+
+        if(has_66) {
+            const name = make_instruction_name(encoding, size, 0x66);
+            const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
+        }
+        if(has_F2) {
+            const name = make_instruction_name(encoding, size, 0xF2);
+            const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
+        }
+        if(has_F3) {
+            const name = make_instruction_name(encoding, size, 0xF3);
+            const body = [gen_modrm_mem_reg_split(name, modrm_resolve_prefix, mem_args, reg_args)];
+            if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
+        }
+
+        const else_block = {
+            body: [gen_modrm_mem_reg_split(make_instruction_name(encoding, size), modrm_resolve_prefix, mem_args, reg_args)],
+        };
+
+        return [
+            "int32_t modrm_byte = read_imm8();",
+            "int32_t prefixes_ = *prefixes;",
+            {
+                type: "if-else",
+                if_blocks,
+                else_block,
+            }
+        ].concat(instruction_postfix);
+    }
+    else if(encoding.fixed_g === undefined && encoding.e)
+    {
+        // instruction with modrm byte where the middle 3 bits encode a register
+
+        console.assert(encodings.length === 1);
+
+        const instruction_name = make_instruction_name(encoding, size);
+        const imm_read = gen_read_imm_call(encoding, size);
+
+        if(encoding.ignore_mod)
+        {
+            console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
+
+            // Has modrm byte, but the 2 mod bits are ignored and both
+            // operands are always registers (0f20-0f24)
+
+            return [
+                "int32_t modrm_byte = read_imm8();",
+                gen_codegen_call(instruction_name, ["modrm_byte & 7", "modrm_byte >> 3 & 7"]),
+            ].concat(instruction_postfix);
+        }
+        else
+        {
+            let modrm_resolve_prefix = undefined;
+
+            if(encoding.requires_prefix_call)
+            {
+                modrm_resolve_prefix = gen_codegen_call(instruction_name + "_mem_pre");
+            }
+
+            const mem_args = ["modrm_byte", "modrm_byte >> 3 & 7"];
+            const reg_args = ["modrm_byte & 7", "modrm_byte >> 3 & 7"];
+
+            if(imm_read)
+            {
+                mem_args.push(imm_read);
+                reg_args.push(imm_read);
+            }
+
+            return [
+                "int32_t modrm_byte = read_imm8();",
+                gen_modrm_mem_reg_split(instruction_name, modrm_resolve_prefix, mem_args, reg_args),
+            ].concat(instruction_postfix);
+        }
+    }
+    else if(encoding.prefix)
+    {
+        const instruction_name = make_instruction_name(encoding, size) + "_jit";
+        const args = [];
+        console.assert(instruction_postfix.length === 0);
+
+        return [gen_call(instruction_name)];
+        //return [gen_codegen_call(instruction_name, args)].concat(instruction_postfix);
+    }
+    else
+    {
+        // instruction without modrm byte or prefix
+
+        const imm_read = gen_read_imm_call(encoding, size);
+        const instruction_name = make_instruction_name(encoding, size);
+
+        const args = [];
+
+        if(imm_read)
+        {
+            args.push(imm_read);
+        }
+
+        if(encoding.extra_imm16)
+        {
+            console.assert(imm_read);
+            args.push("read_imm16()");
+        }
+        else if(encoding.extra_imm8)
+        {
+            console.assert(imm_read);
+            args.push("read_imm8()");
+        }
+
+        return [gen_codegen_call(instruction_name, args)].concat(instruction_postfix);
+    }
+}
+
+function gen_table()
+{
+    let by_opcode = Object.create(null);
+    let by_opcode0f = Object.create(null);
+
+    for(let o of encodings)
+    {
+        let opcode = o.opcode;
+
+        if(opcode >= 0x100)
+        {
+            if((opcode & 0xFF00) === 0x0F00)
+            {
+                opcode &= 0xFF;
+                by_opcode0f[opcode] = by_opcode0f[opcode] || [];
+                by_opcode0f[opcode].push(o);
+            }
+        }
+        else
+        {
+            by_opcode[opcode] = by_opcode[opcode] || [];
+            by_opcode[opcode].push(o);
+        }
+    }
+
+    let cases = [];
+    for(let opcode = 0; opcode < 0x100; opcode++)
+    {
+        let encoding = by_opcode[opcode];
+        console.assert(encoding && encoding.length);
+
+        let opcode_hex = hex(opcode, 2);
+
+        if(encoding[0].os)
+        {
+            cases.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 16),
+            });
+            cases.push({
+                conditions: [`0x${opcode_hex}|0x100`],
+                body: gen_instruction_body(encoding, 32),
+            });
+        }
+        else
+        {
+            cases.push({
+                conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
+                body: gen_instruction_body(encoding, undefined),
+            });
+        }
+    }
+    const table = {
+        type: "switch",
+        condition: "opcode",
+        cases,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+    fs.writeFileSync("/tmp/jit", c_ast.print_syntax_tree([table]).join("\n") + "\n");
+
+    const cases0f_16 = [];
+    const cases0f_32 = [];
+    for(let opcode = 0; opcode < 0x100; opcode++)
+    {
+        let encoding = by_opcode0f[opcode];
+
+        if(!encoding)
+        {
+            encoding = [
+                {
+                    opcode: 0x0F00 | opcode,
+                },
+            ];
+        }
+
+        console.assert(encoding && encoding.length);
+
+        let opcode_hex = hex(opcode, 2);
+
+        if(encoding[0].os)
+        {
+            cases0f_16.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 16),
+            });
+            cases0f_32.push({
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, 32),
+            });
+        }
+        else
+        {
+            let block = {
+                conditions: [`0x${opcode_hex}`],
+                body: gen_instruction_body(encoding, undefined),
+            };
+            cases0f_16.push(block);
+            cases0f_32.push(block);
+        }
+    }
+
+    const table0f_16 = {
+        type: "switch",
+        condition: "opcode",
+        cases: cases0f_16,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+    const table0f_32 = {
+        type: "switch",
+        condition: "opcode",
+        cases: cases0f_32,
+        default_case: {
+            body: ["assert(false);"]
+        },
+    };
+    fs.writeFileSync("/tmp/jit0f_16", c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n");
+    fs.writeFileSync("/tmp/jit0f_32", c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n");
+}
--- a/src/browser/starter.js
+++ b/src/browser/starter.js
@ -244,6 +244,9 @@ function V86Starter(options)
            return f | 0;
        },
        "_get_time": () => Date.now(),
+
+        "_codegen_finalize": (virt_start, start, end) => cpu.codegen_finalize(virt_start, start, end),
+        "_codegen_call_cache": (start) => cpu.codegen_call_cache(start),
    };

    let wasm_file = DEBUG ? "v86-debug.wasm" : "v86.wasm";
--- a/src/cpu.js
+++ b/src/cpu.js
@ -20,6 +20,32 @@ function CPU(bus, wm, codegen)

    this.memory_size = new Uint32Array(wm.memory.buffer, 812, 1);

+    {
+        const imports = {
+            "e": {
+                "m": this.wm.mem,
+            },
+        };
+
+        const exports = this.wm.instance.exports;
+
+        for(let name of Object.keys(exports))
+        {
+            if(name[0] !== "_")
+            {
+                continue;
+            }
+
+            imports["e"][name.slice(1)] = exports[name];
+        }
+
+        this.jit_imports = imports;
+    }
+
+    // XXX: Replace with wasm table
+    // XXX: Not garbage collected currently
+    this.instr_cache = Object.create(null);
+
    // Note: Currently unused (degrades performance and not required by any OS
    //       that we support)
    this.a20_enabled = new Int32Array(wm.memory.buffer, 552, 1);
@ -1379,6 +1405,78 @@ CPU.prototype.run_instruction_0f = function()
    }
 };

+var seen_code = {};
+
+CPU.prototype.codegen_call_cache = function(start)
+{
+    //const before = this.instruction_pointer[0];
+    //dbg_log("calling cached generated code at " + h(before));
+    this.instr_cache[start]();
+    //const after = this.instruction_pointer[0];
+    //dbg_log("cached code block from " + h(before) + " to " + h(after));
+};
+
+CPU.prototype.codegen_finalize = function(virtual_start, start, end)
+{
+    dbg_log("finalize");
+    const code = this.codegen.get_module_code();
+
+    //this.debug.dump_wasm(code);
+
+    let module;
+
+    if(DEBUG)
+    {
+        if(true && !seen_code[start])
+        {
+            this.debug.dump_wasm(code);
+
+            seen_code[start] = true;
+
+            const buffer = new Uint8Array(end - start + 1);
+
+            for(let i = start; i < end + 1; i++)
+            {
+                buffer[i - start] = this.read8(i);
+            }
+
+            this.debug.dump_code(this.is_32[0] ? 1 : 0, buffer, start);
+        }
+
+        try
+        {
+            module = new WebAssembly.Module(code);
+        }
+        catch(e)
+        {
+            //debugger;
+
+            console.log(e);
+            debugger;
+
+            //dump_file(code);
+        }
+    }
+    else
+    {
+        module = new WebAssembly.Module(code);
+    }
+
+    const instance = new WebAssembly.Instance(module, this.jit_imports);
+    const f = instance.exports["f"];
+
+    this.instr_cache[start] = f;
+
+    this.instruction_pointer[0] = virtual_start;
+
+    const before = this.instruction_pointer[0];
+    dbg_log("calling generated code at " + h(before >>> 0));
+    //debugger;
+    f();
+    const after = this.instruction_pointer[0];
+    dbg_log("code block from " + h(before >>> 0) + " to " + h(after >>> 0));
+};
+
 CPU.prototype.dbg_log = function()
 {
    dbg_log("from wasm: " + [].join.call(arguments));
--- a/src/native/all.c
+++ b/src/native/all.c
@ -20,4 +20,3 @@ extern double_t math_pow(double_t, double_t);
 #include "instructions_0f.c"
 #include "string.c"
 #include "sse_instr.c"
-
--- a/src/native/const.h
+++ b/src/native/const.h
@ -173,5 +173,5 @@
 #define MAX_INSTR_LEN 15
 #define MAX_BLOCK_LENGTH ((1 << DIRTY_ARR_SHIFT) - MAX_INSTR_LEN)

-#define ENABLE_JIT 0
+#define ENABLE_JIT 1
 #define ENABLE_PROFILER 0
--- a/src/native/cpu.c
+++ b/src/native/cpu.c
@ -7,6 +7,7 @@
 #include "const.h"
 #include "global_pointers.h"
 #include "profiler.h"
+#include "codegen/codegen.h"

 // like memcpy, but only efficient for large (approximately 10k) sizes
 // See memcpy in https://github.com/kripken/emscripten/blob/master/src/library.js
@ -55,7 +56,8 @@ void after_jump()
    jit_jump = 1;
 }

-void diverged() {
+void diverged()
+{
    after_jump();
 }

@ -117,6 +119,8 @@ int32_t translate_address_write(int32_t address)
    }
 }

+bool jit_in_progress = false; // XXX: For debugging
+
 int32_t read_imm8()
 {
    int32_t eip = *instruction_pointer;
@ -129,6 +133,7 @@ int32_t read_imm8()

    assert(!in_mapped_range(*eip_phys ^ eip));
    int32_t data8 = mem8[*eip_phys ^ eip];
+    if(jit_in_progress) dbg_log("%x/8/%x", eip, data8);
    *instruction_pointer = eip + 1;

    return data8;
@ -150,6 +155,7 @@ int32_t read_imm16()
    }

    int32_t data16 = read16(*eip_phys ^ *instruction_pointer);
+    if(jit_in_progress) dbg_log("%x/16/%x", *instruction_pointer, data16);
    *instruction_pointer = *instruction_pointer + 2;

    return data16;
@ -164,6 +170,7 @@ int32_t read_imm32s()
    }

    int32_t data32 = read32s(*eip_phys ^ *instruction_pointer);
+    if(jit_in_progress) dbg_log("%x/32/%x", *instruction_pointer, data32);
    *instruction_pointer = *instruction_pointer + 4;

    return data32;
@ -222,7 +229,7 @@ int32_t get_seg_prefix_ds(int32_t offset) { return get_seg_prefix(DS) + offset;
 int32_t get_seg_prefix_ss(int32_t offset) { return get_seg_prefix(SS) + offset; }
 int32_t get_seg_prefix_cs(int32_t offset) { return get_seg_prefix(CS) + offset; }

-static void run_instruction(int32_t);
+void run_instruction(int32_t);
 static int32_t resolve_modrm16(int32_t);
 static int32_t resolve_modrm32(int32_t);

@ -243,6 +250,28 @@ uint32_t jit_hot_hash(uint32_t addr)
    return addr % HASH_PRIME;
 }

+static void jit_instruction(int32_t);
+void codegen_finalize(int32_t, int32_t, int32_t);
+void codegen_call_cache(int32_t);
+
+void generate_instruction(int32_t opcode)
+{
+    gen_set_previous_eip();
+    gen_increment_instruction_pointer(0);
+
+    int32_t start_eip = *instruction_pointer - 1;
+
+    jit_instruction(opcode);
+
+    int32_t end_eip = *instruction_pointer;
+    int32_t instruction_length = end_eip - start_eip;
+
+    assert(instruction_length >= 0 && instruction_length < 16);
+    dbg_log("instruction_length=%d", instruction_length);
+
+    gen_patch_increment_instruction_pointer(instruction_length);
+}
+
 void cycle_internal()
 {
 #if ENABLE_JIT
@ -265,13 +294,12 @@ void cycle_internal()
    bool cached = entry->start_addr == phys_addr && entry->is_32 == *is_32;
    bool clean = entry->group_status == group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];

-    if(cached && !clean)
-    {
-        // Remove the cached entry from the Table
-        jit_clear_func(addr_index);
-    }
+    const bool JIT_ALWAYS = false;
+    const bool JIT_DONT_USE_CACHE = false;

-    if(cached && clean)
+    if(!JIT_DONT_USE_CACHE &&
+       entry->group_status == group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT] &&
+       entry->start_addr == phys_addr)
    {
        // XXX: With the code-generation, we need to figure out how we
        // would call the function from the other module here; likely
@ -279,6 +307,7 @@ void cycle_internal()

        // Confirm that cache is not dirtied (through page-writes,
        // mode switch, or just cache eviction)
+        /*
        for(int32_t i = 0; i < entry->len; i++)
        {
            *previous_ip = *instruction_pointer;
@ -287,7 +316,10 @@ void cycle_internal()
            assert(opcode == entry->opcode[i]);
            run_instruction(entry->opcode[i] | !!*is_32 << 8);
            (*timestamp_counter)++;
-        }
+        }*/
+
+        codegen_call_cache(phys_addr);
+
        // XXX: Try to find an assert to detect self-modifying code
        // JIT compiled self-modifying basic blocks may trigger this assert
        // assert(entry->group_status != group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT]);
@ -295,10 +327,14 @@ void cycle_internal()
    }
    // A jump just occured indicating the start of a basic block + the
    // address is hot; let's JIT compile it
-    else if(jit_jump == 1 && ++hot_code_addresses[jit_hot_hash(phys_addr)] > JIT_THRESHOLD)
+    else if(JIT_ALWAYS || jit_jump == 1 && ++hot_code_addresses[jit_hot_hash(phys_addr)] > JIT_THRESHOLD)
    {
+        int32_t start_addr = *instruction_pointer;
+        jit_in_progress = false;
+
        // Minimize collision based thrashing
        hot_code_addresses[jit_hot_hash(phys_addr)] = 0;
+
        jit_jump = 0;
        entry->len = 0;
        entry->start_addr = phys_addr;
@ -308,6 +344,8 @@ void cycle_internal()

        *cache_compile = *cache_compile + 1;

+        gen_reset();
+
        // XXX: Artificial limit allows jit_dirty_cache to be
        // simplified by only dirtying 2 entries based on a mask
        // (instead of all possible entries)
@ -316,23 +354,30 @@ void cycle_internal()
        {
            *previous_ip = *instruction_pointer;
            int32_t opcode = read_imm8();
-            // XXX: Currently only includes opcode of final jmp, not operands
-            entry->end_addr = *eip_phys ^ *instruction_pointer;
+
            entry->opcode[entry->len] = opcode;
            entry->len++;

-            // XXX: Generate the instruction instead of running it
-            // XXX: If it's a jmp instruction, make sure
-            // generate_instruction sets jit_jump=1 and end_addr is set correctly
-            run_instruction(opcode | !!*is_32 << 8);
-            (*timestamp_counter)++;
+            generate_instruction(opcode | !!*is_32 << 8);
+
+            entry->end_addr = *eip_phys ^ *instruction_pointer;
        }
+
+        jit_jump = 0;
+
+        gen_finish();
+        jit_in_progress = false;
+
+        codegen_finalize(start_addr, entry->start_addr, entry->end_addr);
+
+        assert(*prefixes == 0);
+
        // When the hot instruction is a jmp (backwards),
        // leave its group_status unupdated, thereby invalidating it
-        if (entry->end_addr > entry->start_addr)
-        {
-            entry->group_status = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
-        }
+        //if (entry->end_addr > entry->start_addr)
+        //{
+        entry->group_status = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
+        //}
    }
    // Regular un-hot code execution
    else
@ -364,6 +409,12 @@ static void run_prefix_instruction()
    run_instruction(read_imm8() | is_osize_32() << 8);
 }

+static void jit_prefix_instruction()
+{
+    dbg_log("jit_prefix_instruction is32=%d", is_osize_32());
+    jit_instruction(read_imm8() | is_osize_32() << 8);
+}
+
 void clear_prefixes()
 {
    *prefixes = 0;
@ -377,6 +428,14 @@ void segment_prefix_op(int32_t seg)
    *prefixes = 0;
 }

+void segment_prefix_op_jit(int32_t seg)
+{
+    assert(seg <= 5);
+    gen_add_prefix_bits(seg + 1);
+    jit_prefix_instruction();
+    gen_clear_prefixes();
+}
+
 void do_many_cycles_unsafe()
 {
    for(int32_t k = 0; k < LOOP_COUNTER; k++)
--- a/src/native/instructions.c
+++ b/src/native/instructions.c
--- a/src/native/instructions_0f.c
+++ b/src/native/instructions_0f.c