v86/gen/generate_analyzer.js

573 lines
17 KiB
JavaScript
Executable file

#!/usr/bin/env node
"use strict";
const fs = require("fs");
const path = require("path");
const encodings = require("./x86_table");
const c_ast = require("./c_ast");
const { hex, mkdirpSync, get_switch_value, get_switch_exist, finalize_table } = require("./util");
const APPEND_NONFAULTING_FLAG = "analysis.flags |= JIT_INSTR_NONFAULTING_FLAG;";
const OUT_DIR = get_switch_value("--output-dir") || path.join(__dirname, "..", "build");
mkdirpSync(OUT_DIR);
const table_arg = get_switch_value("--table");
const gen_all = get_switch_exist("--all");
const to_generate = {
analyzer: gen_all || table_arg === "analyzer",
analyzer0f_16: gen_all || table_arg === "analyzer0f_16",
analyzer0f_32: gen_all || table_arg === "analyzer0f_32",
};
console.assert(
Object.keys(to_generate).some(k => to_generate[k]),
"Pass --table [analyzer|analyzer0f_16|analyzer0f_32] or --all to pick which tables to generate"
);
gen_table();
function gen_read_imm_call(op, size_variant)
{
let size = (op.os || op.opcode % 2 === 1) ? size_variant : 8;
if(op.imm8 || op.imm8s || op.imm16 || op.imm1632 || op.imm32 || op.immaddr)
{
if(op.imm8)
{
return "read_imm8()";
}
else if(op.imm8s)
{
return "read_imm8s()";
}
else
{
if(op.immaddr)
{
// immaddr: depends on address size
return "read_moffs()";
}
else
{
console.assert(op.imm1632 || op.imm16 || op.imm32);
if(op.imm1632 && size === 16 || op.imm16)
{
return "read_imm16()";
}
else
{
console.assert(op.imm1632 && size === 32 || op.imm32);
return "read_imm32s()";
}
}
}
}
else
{
return undefined;
}
}
function gen_call(name, args)
{
args = args || [];
return `${name}(${args.join(", ")});`;
}
function gen_codegen_call(args)
{
return args.map(arg => arg + ";");
}
function gen_codegen_call_modrm(args)
{
args = args.map(arg => arg + ";");
return [].concat(gen_call("modrm_skip", ["modrm_byte"]), args);
}
function gen_modrm_mem_reg_split(mem_args, reg_args, postfixes={})
{
const { mem_postfix=[], reg_postfix=[] } = postfixes;
return {
type: "if-else",
if_blocks: [{
condition: "modrm_byte < 0xC0",
body: []
.concat(gen_codegen_call_modrm(mem_args))
.concat(mem_postfix),
}],
else_block: {
body: gen_codegen_call(reg_args).concat(reg_postfix),
},
};
}
/*
* Current naming scheme:
* instr(16|32|)_((66|F2|F3)?0F)?[0-9a-f]{2}(_[0-7])?(_mem|_reg|)
*/
function make_instruction_name(encoding, size, prefix_variant)
{
const suffix = encoding.os ? String(size) : "";
const opcode_hex = hex(encoding.opcode & 0xFF, 2);
const prefix_0f = (encoding.opcode & 0xFF00) === 0x0F00 ? "0F" : "";
const prefix = prefix_variant === undefined ? "" : hex(prefix_variant, 2);
const fixed_g_suffix = encoding.fixed_g === undefined ? "" : `_${encoding.fixed_g}`;
return `instr${suffix}_${prefix}${prefix_0f}${opcode_hex}${fixed_g_suffix}`;
}
function get_nonfaulting_mem_reg_postfix(encoding)
{
const lea_special_case = encoding.opcode === 0x8D;
const mem_postfix = (encoding.nonfaulting && lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
const reg_postfix = (encoding.nonfaulting && !lea_special_case) ? [APPEND_NONFAULTING_FLAG] : [];
return {
mem_postfix,
reg_postfix,
};
}
function create_instruction_postfix(encoding)
{
return [].concat(
encoding.block_boundary ? ["analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;"] : [],
encoding.no_next_instruction ? ["analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;"] : []
);
}
function gen_instruction_body(encodings, size)
{
const encoding = encodings[0];
let has_66 = false;
let has_F2 = false;
let has_F3 = false;
for(let e of encodings)
{
if((e.opcode >>> 16) === 0x66) has_66 = true;
if((e.opcode >>> 16) === 0xF2) has_F2 = true;
if((e.opcode >>> 16) === 0xF3) has_F3 = true;
}
if(has_66 || has_F2 || has_F3)
{
console.assert((encoding.opcode & 0xFF00) === 0x0F00);
}
if(encoding.fixed_g !== undefined)
{
// instruction with modrm byte where the middle 3 bits encode the instruction
// group by opcode without prefix plus middle bits of modrm byte
let cases = encodings.reduce((cases_by_opcode, case_) => {
console.assert(typeof case_.fixed_g === "number");
cases_by_opcode[case_.opcode & 0xFFFF | case_.fixed_g << 16] = case_;
return cases_by_opcode;
}, Object.create(null));
cases = Object.values(cases).sort((e1, e2) => e1.fixed_g - e2.fixed_g);
return [
"int32_t modrm_byte = read_imm8();",
{
type: "switch",
condition: "modrm_byte >> 3 & 7",
cases: cases.map(case_ => {
const fixed_g = case_.fixed_g;
const instruction_postfix = create_instruction_postfix(case_);
const mem_args = [];
const reg_args = [];
const imm_read = gen_read_imm_call(case_, size);
if(imm_read)
{
mem_args.push(imm_read);
reg_args.push(imm_read);
}
if(has_66 || has_F2 || has_F3)
{
const if_blocks = [];
if(has_66) {
const name = make_instruction_name(case_, size, 0x66);
const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
}
if(has_F2) {
const name = make_instruction_name(case_, size, 0xF2);
const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
}
if(has_F3) {
const name = make_instruction_name(case_, size, 0xF3);
const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
}
const else_block = {
body: [
gen_modrm_mem_reg_split(
mem_args,
reg_args,
{}
)
],
};
return {
conditions: [fixed_g],
body: [
"int32_t prefixes_ = *prefixes;",
{
type: "if-else",
if_blocks,
else_block,
},
].concat(instruction_postfix),
};
}
else
{
const body = [
gen_modrm_mem_reg_split(
mem_args,
reg_args,
get_nonfaulting_mem_reg_postfix(case_)
)
].concat(instruction_postfix);
return {
conditions: [fixed_g],
body,
};
}
}),
default_case: {
body: [
"analysis.flags |= JIT_INSTR_BLOCK_BOUNDARY_FLAG;",
"analysis.flags |= JIT_INSTR_NO_NEXT_INSTRUCTION_FLAG;",
],
}
},
];
}
else if(has_66 || has_F2 || has_F3)
{
// instruction without modrm byte but with prefix
console.assert(encoding.e);
console.assert(!encoding.ignore_mod);
const instruction_postfix = create_instruction_postfix(encoding);
const imm_read = gen_read_imm_call(encoding, size);
const mem_args = [];
const reg_args = [];
if(imm_read)
{
mem_args.push(imm_read);
reg_args.push(imm_read);
}
const if_blocks = [];
if(has_66) {
const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
if_blocks.push({ condition: "prefixes_ & PREFIX_66", body, });
}
if(has_F2) {
const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
if_blocks.push({ condition: "prefixes_ & PREFIX_F2", body, });
}
if(has_F3) {
const body = [gen_modrm_mem_reg_split(mem_args, reg_args, {})];
if_blocks.push({ condition: "prefixes_ & PREFIX_F3", body, });
}
const else_block = {
body: [
gen_modrm_mem_reg_split(
mem_args,
reg_args,
{}
)
],
};
return [
"int32_t modrm_byte = read_imm8();",
"int32_t prefixes_ = *prefixes;",
{
type: "if-else",
if_blocks,
else_block,
}
].concat(instruction_postfix);
}
else if(encoding.fixed_g === undefined && encoding.e)
{
// instruction with modrm byte where the middle 3 bits encode a register
console.assert(encodings.length === 1);
const instruction_postfix = create_instruction_postfix(encoding);
const imm_read = gen_read_imm_call(encoding, size);
if(encoding.ignore_mod)
{
console.assert(!imm_read, "Unexpected instruction (ignore mod with immediate value)");
// Has modrm byte, but the 2 mod bits are ignored and both
// operands are always registers (0f20-0f24)
if(encoding.nonfaulting)
{
instruction_postfix.push(APPEND_NONFAULTING_FLAG);
}
return ["int32_t modrm_byte = read_imm8();"]
.concat(gen_codegen_call([]))
.concat(instruction_postfix);
}
else
{
const mem_args = [];
const reg_args = [];
if(imm_read)
{
mem_args.push(imm_read);
reg_args.push(imm_read);
}
return [
"int32_t modrm_byte = read_imm8();",
gen_modrm_mem_reg_split(
mem_args,
reg_args,
get_nonfaulting_mem_reg_postfix(encoding)
),
].concat(instruction_postfix);
}
}
else if(encoding.prefix)
{
console.assert(!encoding.nonfaulting, "Prefix/custom instructions cannot be marked as nonfaulting.");
const instruction_postfix = create_instruction_postfix(encoding);
const instruction_name = make_instruction_name(encoding, size) + "_analyze";
const imm_read = gen_read_imm_call(encoding, size);
const args = [];
if(imm_read)
{
args.push(imm_read);
}
const call_prefix = encoding.prefix ? "return " : "";
// Prefix calls can add to the return flags
return [call_prefix + gen_call(instruction_name, args)].concat(instruction_postfix);
}
else
{
// instruction without modrm byte or prefix
const instruction_postfix = create_instruction_postfix(encoding);
const imm_read = gen_read_imm_call(encoding, size);
const args = [];
if(imm_read)
{
if(encoding.jump_offset_imm)
{
args.push("int32_t jump_offset = " + imm_read + ";");
args.push("analysis.jump_offset = jump_offset;");
args.push("analysis.flags |= is_osize_32() ? JIT_INSTR_IMM_JUMP32_FLAG : JIT_INSTR_IMM_JUMP16_FLAG;");
}
else
{
args.push(imm_read + ";");
}
}
if(encoding.extra_imm16)
{
console.assert(imm_read);
args.push(gen_call("read_imm16"));
}
else if(encoding.extra_imm8)
{
console.assert(imm_read);
args.push(gen_call("read_imm8"));
}
if(encoding.nonfaulting)
{
instruction_postfix.push(APPEND_NONFAULTING_FLAG);
}
if(encoding.conditional_jump)
{
console.assert((encoding.opcode & ~0xF) === 0x70 || (encoding.opcode & ~0xF) === 0x0F80);
instruction_postfix.push("analysis.condition_index = " + (encoding.opcode & 0xF) + ";");
}
return args.concat(instruction_postfix);
}
}
function gen_table()
{
let by_opcode = Object.create(null);
let by_opcode0f = Object.create(null);
for(let o of encodings)
{
let opcode = o.opcode;
if(opcode >= 0x100)
{
if((opcode & 0xFF00) === 0x0F00)
{
opcode &= 0xFF;
by_opcode0f[opcode] = by_opcode0f[opcode] || [];
by_opcode0f[opcode].push(o);
}
}
else
{
by_opcode[opcode] = by_opcode[opcode] || [];
by_opcode[opcode].push(o);
}
}
let cases = [];
for(let opcode = 0; opcode < 0x100; opcode++)
{
let encoding = by_opcode[opcode];
console.assert(encoding && encoding.length);
let opcode_hex = hex(opcode, 2);
if(encoding[0].os)
{
cases.push({
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, 16),
});
cases.push({
conditions: [`0x${opcode_hex}|0x100`],
body: gen_instruction_body(encoding, 32),
});
}
else
{
cases.push({
conditions: [`0x${opcode_hex}`, `0x${opcode_hex}|0x100`],
body: gen_instruction_body(encoding, undefined),
});
}
}
const table = {
type: "switch",
condition: "opcode",
cases,
default_case: {
body: ["assert(false);"]
},
};
if(to_generate.analyzer)
{
finalize_table(
OUT_DIR,
"analyzer",
c_ast.print_syntax_tree([table]).join("\n") + "\n"
);
}
const cases0f_16 = [];
const cases0f_32 = [];
for(let opcode = 0; opcode < 0x100; opcode++)
{
let encoding = by_opcode0f[opcode];
console.assert(encoding && encoding.length);
let opcode_hex = hex(opcode, 2);
if(encoding[0].os)
{
cases0f_16.push({
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, 16),
});
cases0f_32.push({
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, 32),
});
}
else
{
let block = {
conditions: [`0x${opcode_hex}`],
body: gen_instruction_body(encoding, undefined),
};
cases0f_16.push(block);
cases0f_32.push(block);
}
}
const table0f_16 = {
type: "switch",
condition: "opcode",
cases: cases0f_16,
default_case: {
body: ["assert(false);"]
},
};
const table0f_32 = {
type: "switch",
condition: "opcode",
cases: cases0f_32,
default_case: {
body: ["assert(false);"]
},
};
if(to_generate.analyzer0f_16)
{
finalize_table(
OUT_DIR,
"analyzer0f_16",
c_ast.print_syntax_tree([table0f_16]).join("\n") + "\n"
);
}
if(to_generate.analyzer0f_32)
{
finalize_table(
OUT_DIR,
"analyzer0f_32",
c_ast.print_syntax_tree([table0f_32]).join("\n") + "\n"
);
}
}