From 37c3d1f83cc5919ebd1c49cbb407e92bcfaa971e Mon Sep 17 00:00:00 2001 From: Fabian Date: Thu, 31 Dec 2020 19:14:33 -0600 Subject: [PATCH] Generate direct control flow, using wasm blocks and loops --- gen/generate_analyzer.js | 4 + gen/x86_table.js | 8 +- src/browser/print_stats.js | 28 +- src/rust/analysis.rs | 2 + src/rust/codegen.rs | 27 +- src/rust/control_flow.rs | 407 ++++++++++++ src/rust/jit.rs | 1043 ++++++++++++++++++++++-------- src/rust/jit_instructions.rs | 72 ++- src/rust/lib.rs | 1 + src/rust/opstats.rs | 14 +- src/rust/profiler.rs | 29 +- src/rust/wasmgen/wasm_builder.rs | 29 +- 12 files changed, 1298 insertions(+), 366 deletions(-) create mode 100644 src/rust/control_flow.rs diff --git a/gen/generate_analyzer.js b/gen/generate_analyzer.js index 28387ba4..15d87577 100755 --- a/gen/generate_analyzer.js +++ b/gen/generate_analyzer.js @@ -232,6 +232,10 @@ function gen_instruction_body_after_fixed_g(encoding, size) { instruction_postfix.push("analysis.no_next_instruction = true;"); } + if(encoding.absolute_jump) + { + instruction_postfix.push("analysis.absolute_jump = true;"); + } if(encoding.prefix) { diff --git a/gen/x86_table.js b/gen/x86_table.js index 43d3ea02..09e5c2a1 100644 --- a/gen/x86_table.js +++ b/gen/x86_table.js @@ -244,8 +244,8 @@ const encodings = [ { opcode: 0xF2AF, block_boundary: 1, custom: 1, is_string: 1, os: 1, }, { opcode: 0xF3AF, block_boundary: 1, custom: 1, is_string: 1, os: 1, }, - { opcode: 0xC2, custom: 1, block_boundary: 1, no_next_instruction: 1, os: 1, imm16: 1, skip: 1, }, // ret - { opcode: 0xC3, custom: 1, block_boundary: 1, no_next_instruction: 1, os: 1, skip: 1, }, + { opcode: 0xC2, custom: 1, block_boundary: 1, no_next_instruction: 1, os: 1, absolute_jump: 1, imm16: 1, skip: 1, }, // ret + { opcode: 0xC3, custom: 1, block_boundary: 1, no_next_instruction: 1, os: 1, absolute_jump: 1, skip: 1, }, { opcode: 0xC4, block_boundary: 1, os: 1, e: 1, skip: 1, }, // les { opcode: 0xC5, block_boundary: 1, os: 1, e: 1, skip: 1, }, // lds @@ -401,9 +401,9 @@ const encodings = [ { opcode: 0xFE, e: 1, fixed_g: 1, custom: 1 }, { opcode: 0xFF, os: 1, e: 1, fixed_g: 0, custom: 1, }, { opcode: 0xFF, os: 1, e: 1, fixed_g: 1, custom: 1, }, - { opcode: 0xFF, os: 1, e: 1, fixed_g: 2, custom: 1, block_boundary: 1, skip: 1, }, + { opcode: 0xFF, os: 1, e: 1, fixed_g: 2, custom: 1, block_boundary: 1, absolute_jump: 1, skip: 1, }, { opcode: 0xFF, os: 1, e: 1, fixed_g: 3, block_boundary: 1, skip: 1, }, - { opcode: 0xFF, os: 1, e: 1, fixed_g: 4, custom: 1, block_boundary: 1, no_next_instruction: 1, skip: 1, }, + { opcode: 0xFF, os: 1, e: 1, fixed_g: 4, custom: 1, block_boundary: 1, absolute_jump: 1, no_next_instruction: 1, skip: 1, }, { opcode: 0xFF, os: 1, e: 1, fixed_g: 5, block_boundary: 1, no_next_instruction: 1, skip: 1, }, { opcode: 0xFF, custom: 1, os: 1, e: 1, fixed_g: 6, }, diff --git a/src/browser/print_stats.js b/src/browser/print_stats.js index 872d8c79..c3129261 100644 --- a/src/browser/print_stats.js +++ b/src/browser/print_stats.js @@ -19,9 +19,14 @@ const print_stats = { "COMPILE_WITH_LOOP_SAFETY", "COMPILE_PAGE", "COMPILE_BASIC_BLOCK", + "COMPILE_DUPLICATED_BASIC_BLOCK", + "COMPILE_WASM_BLOCK", + "COMPILE_WASM_LOOP", + "COMPILE_DISPATCHER", "COMPILE_ENTRY_POINT", "COMPILE_WASM_TOTAL_BYTES", - "CACHE_MISMATCH", + "JIT_CACHE_OVERRIDE", + "JIT_CACHE_OVERRIDE_DIFFERENT_STATE_FLAGS", "RUN_INTERPRETED", "RUN_INTERPRETED_PENDING", "RUN_INTERPRETED_NEAR_END_OF_PAGE", @@ -31,6 +36,24 @@ const print_stats = { "RUN_INTERPRETED_STEPS", "RUN_FROM_CACHE", "RUN_FROM_CACHE_STEPS", + "DIRECT_EXIT", + "INDIRECT_JUMP", + "INDIRECT_JUMP_NO_ENTRY", + "NORMAL_PAGE_CHANGE", + "NORMAL_FALLTHRU", + "NORMAL_FALLTHRU_WITH_TARGET_BLOCK", + "NORMAL_BRANCH", + "NORMAL_BRANCH_WITH_TARGET_BLOCK", + "CONDITIONAL_JUMP", + "CONDITIONAL_JUMP_PAGE_CHANGE", + "CONDITIONAL_JUMP_EXIT", + "CONDITIONAL_JUMP_FALLTHRU", + "CONDITIONAL_JUMP_FALLTHRU_WITH_TARGET_BLOCK", + "CONDITIONAL_JUMP_BRANCH", + "CONDITIONAL_JUMP_BRANCH_WITH_TARGET_BLOCK", + "DISPATCHER_SMALL", + "DISPATCHER_LARGE", + "LOOP", "FAILED_PAGE_CHANGE", "SAFE_READ_FAST", "SAFE_READ_SLOW_PAGE_CROSSED", @@ -93,9 +116,10 @@ const print_stats = { text += "TLB_ENTRIES=" + tlb_entries + " (" + global_tlb_entries + " global, " + nonglobal_tlb_entries + " non-global)\n"; text += "WASM_TABLE_FREE=" + cpu.wm.exports["jit_get_wasm_table_index_free_list_count"]() + "\n"; + text += "JIT_CACHE_SIZE=" + cpu.wm.exports["jit_get_cache_size"]() + "\n"; text += "FLAT_SEGMENTS=" + cpu.wm.exports["has_flat_segmentation"]() + "\n"; - text += "do_many_cycles avg: " + do_many_cycles_total / do_many_cycles_count + "\n"; + text += "do_many_cycles avg: " + (do_many_cycles_total / do_many_cycles_count || 0) + "\n"; text += "wasm memory size: " + (cpu.wasm_memory.buffer.byteLength >> 20) + "m\n"; return text; diff --git a/src/rust/analysis.rs b/src/rust/analysis.rs index f3a2c421..f52a6bc8 100644 --- a/src/rust/analysis.rs +++ b/src/rust/analysis.rs @@ -18,12 +18,14 @@ pub enum AnalysisType { pub struct Analysis { pub no_next_instruction: bool, + pub absolute_jump: bool, pub ty: AnalysisType, } pub fn analyze_step(mut cpu: &mut CpuContext) -> Analysis { let mut analysis = Analysis { no_next_instruction: false, + absolute_jump: false, ty: AnalysisType::Normal, }; cpu.prefixes = 0; diff --git a/src/rust/codegen.rs b/src/rust/codegen.rs index faf21c69..3548c709 100644 --- a/src/rust/codegen.rs +++ b/src/rust/codegen.rs @@ -17,7 +17,7 @@ pub fn gen_add_cs_offset(ctx: &mut JitContext) { ctx.builder.add_i32(); } -fn gen_get_eip(builder: &mut WasmBuilder) { +pub fn gen_get_eip(builder: &mut WasmBuilder) { builder.load_fixed_i32(global_pointers::instruction_pointer as u32); } @@ -89,36 +89,13 @@ pub fn gen_page_switch_check( ctx.builder.const_i32(next_block_addr as i32); ctx.builder.ne_i32(); ctx.builder.if_void(); + // TODO: br_if gen_profiler_stat_increment(ctx.builder, profiler::stat::FAILED_PAGE_CHANGE); gen_debug_track_jit_exit(ctx.builder, last_instruction_addr); ctx.builder.br(ctx.exit_label); ctx.builder.block_end(); } -pub fn gen_absolute_indirect_jump(ctx: &mut JitContext, new_eip: WasmLocal) { - ctx.builder - .const_i32(global_pointers::instruction_pointer as i32); - ctx.builder.get_local(&new_eip); - ctx.builder.store_aligned_i32(0); - - gen_get_phys_eip(ctx, &new_eip); - ctx.builder.free_local(new_eip); - - ctx.builder - .const_i32(ctx.our_wasm_table_index.to_u16() as i32); - ctx.builder.const_i32(ctx.state_flags.to_u32() as i32); - ctx.builder.call_fn3_ret("jit_find_cache_entry_in_page"); - let new_basic_block_index = ctx.builder.tee_new_local(); - ctx.builder.const_i32(0); - ctx.builder.ge_i32(); - ctx.builder.if_void(); - ctx.builder.get_local(&new_basic_block_index); - ctx.builder.set_local(ctx.basic_block_index_local); - ctx.builder.br(ctx.main_loop_label); - ctx.builder.block_end(); - ctx.builder.free_local(new_basic_block_index); -} - pub fn gen_increment_timestamp_counter(builder: &mut WasmBuilder, n: i32) { builder.increment_fixed_i32(global_pointers::timestamp_counter as u32, n) } diff --git a/src/rust/control_flow.rs b/src/rust/control_flow.rs new file mode 100644 index 00000000..6a46cf54 --- /dev/null +++ b/src/rust/control_flow.rs @@ -0,0 +1,407 @@ +use std::collections::{HashMap, HashSet}; +use std::iter; + +use jit::{BasicBlock, BasicBlockType}; +use profiler; + +const ENTRY_NODE_ID: u32 = 0xffff_ffff; + +type Graph = HashMap>; + +/// Reverse the direction of all edges in the graph +fn rev_graph_edges(nodes: &Graph) -> Graph { + let mut rev_nodes = Graph::new(); + for (from, tos) in nodes { + for to in tos { + rev_nodes + .entry(*to) + .or_insert_with(|| HashSet::new()) + .insert(*from); + } + } + rev_nodes +} + +pub fn make_graph(basic_blocks: &Vec) -> Graph { + let mut nodes = Graph::new(); + let mut entry_edges = HashSet::new(); + + for b in basic_blocks.iter() { + let mut edges = HashSet::new(); + + match &b.ty { + BasicBlockType::ConditionalJump { + next_block_addr, + next_block_branch_taken_addr, + .. + } => { + if let Some(next_block_addr) = next_block_addr { + edges.insert(*next_block_addr); + } + if let Some(next_block_branch_taken_addr) = next_block_branch_taken_addr { + edges.insert(*next_block_branch_taken_addr); + } + }, + BasicBlockType::Normal { next_block_addr } => { + edges.insert(*next_block_addr); + }, + BasicBlockType::Exit => {}, + BasicBlockType::AbsoluteEip => { + // Not necessary: We generate a loop around the outer brtable unconditionally + //edges.insert(ENTRY_NODE_ID); + }, + } + + nodes.insert(b.addr, edges); + + if b.is_entry_block { + entry_edges.insert(b.addr); + } + } + + // Entry node that represents the initial basic block of the generated function (must be + // able to reach all entry nodes) + nodes.insert(ENTRY_NODE_ID, entry_edges); + return nodes; +} + +pub enum WasmStructure { + BasicBlock(u32), + Dispatcher(Vec), + Loop(Vec), + Block(Vec), +} +impl WasmStructure { + pub fn print(&self, depth: usize) { + match self { + Self::BasicBlock(addr) => dbg_log!("{} 0x{:x}", " ".repeat(depth), addr), + Self::Dispatcher(entries) => { + dbg_log!("{} Dispatcher entries:", " ".repeat(depth)); + for e in entries { + dbg_log!("{} {:x}", " ".repeat(depth), e); + } + }, + Self::Loop(elements) => { + dbg_log!("{} loop_void({})", " ".repeat(depth), elements.len()); + for e in elements { + e.print(depth + 1) + } + dbg_log!("{} loop_end({})", " ".repeat(depth), elements.len()); + }, + Self::Block(elements) => { + dbg_log!("{} block_void({})", " ".repeat(depth), elements.len()); + for e in elements { + e.print(depth + 1) + } + dbg_log!("{} block_end({})", " ".repeat(depth), elements.len()); + }, + } + } + + fn branches(&self, edges: &Graph) -> HashSet { + fn handle(block: &WasmStructure, edges: &Graph, result: &mut HashSet) { + match block { + WasmStructure::BasicBlock(addr) => result.extend(edges.get(&addr).unwrap()), + WasmStructure::Dispatcher(entries) => result.extend(entries), + WasmStructure::Loop(children) | WasmStructure::Block(children) => { + for c in children.iter() { + handle(c, edges, result); + } + }, + } + }; + + let mut result = HashSet::new(); + handle(self, edges, &mut result); + result + } + + pub fn head(&self) -> Box + '_> { + match self { + Self::BasicBlock(addr) => Box::new(iter::once(*addr)), + Self::Dispatcher(entries) => Box::new(entries.iter().copied()), + Self::Loop(children) => children.first().unwrap().head(), + Self::Block(elements) => elements.first().unwrap().head(), + } + } +} + +/// Check: +/// - Dispatcher appears at the beginning of a loop +/// - No two nested blocks at the end +/// - No two nested loops at the beginning +/// - No empty blocks or loops +/// - The entry node block is not present +pub fn assert_invariants(blocks: &Vec) { + fn check(node: &WasmStructure, in_tail_block: bool, in_head_loop: bool, is_first: bool) { + match node { + WasmStructure::Block(children) => { + dbg_assert!(!in_tail_block); + dbg_assert!(!children.is_empty()); + for (i, c) in children.iter().enumerate() { + let is_first = i == 0; + let is_last = i == children.len() - 1; + check(c, is_last, in_head_loop && is_first, is_first); + } + }, + WasmStructure::Loop(children) => { + dbg_assert!(!in_head_loop); + dbg_assert!(!children.is_empty()); + for (i, c) in children.iter().enumerate() { + let is_first = i == 0; + let is_last = i == children.len() - 1; + check(c, in_tail_block && is_last, is_first, is_first); + } + }, + &WasmStructure::BasicBlock(addr) => dbg_assert!(addr != ENTRY_NODE_ID), + WasmStructure::Dispatcher(_) => { + dbg_assert!(is_first); + //dbg_assert!(in_head_loop); // fails for module dispatcher + }, + } + } + + for (i, b) in blocks.iter().enumerate() { + check(b, false, false, i == 0); + } +} + +/// Strongly connected components via Kosaraju's algorithm +fn scc(edges: &Graph, rev_edges: &Graph) -> Vec> { + fn visit( + node: u32, + edges: &Graph, + rev_edges: &Graph, + visited: &mut HashSet, + l: &mut Vec, + ) { + if visited.contains(&node) { + return; + } + visited.insert(node); + for &next in edges.get(&node).unwrap() { + visit(next, edges, rev_edges, visited, l); + } + l.push(node); + } + + let mut l = Vec::new(); + let mut visited = HashSet::new(); + for &node in edges.keys() { + visit(node, edges, rev_edges, &mut visited, &mut l); + } + + fn assign( + node: u32, + edges: &Graph, + rev_edges: &Graph, + assigned: &mut HashSet, + group: &mut Vec, + ) { + if assigned.contains(&node) { + return; + } + assigned.insert(node); + group.push(node); + if let Some(nexts) = rev_edges.get(&node) { + for &next in nexts { + assign(next, edges, rev_edges, assigned, group); + } + } + } + let mut assigned = HashSet::new(); + let mut assignment = Vec::new(); + for &node in l.iter().rev() { + let mut group = Vec::new(); + assign(node, edges, rev_edges, &mut assigned, &mut group); + if !group.is_empty() { + assignment.push(group); + } + } + + assignment +} + +pub fn loopify(nodes: &Graph) -> Vec { + let rev_nodes = rev_graph_edges(nodes); + let groups = scc(nodes, &rev_nodes); + + return groups + .iter() + .flat_map(|group| { + dbg_assert!(!group.is_empty()); + if group.len() == 1 { + let addr = group[0]; + if addr == ENTRY_NODE_ID { + let entries = nodes.get(&ENTRY_NODE_ID).unwrap().iter().copied().collect(); + return vec![WasmStructure::Dispatcher(entries)].into_iter(); + } + let block = WasmStructure::BasicBlock(addr); + // self-loops + if nodes.get(&group[0]).unwrap().contains(&group[0]) { + return vec![WasmStructure::Loop(vec![block])].into_iter(); + } + else { + return vec![block].into_iter(); + } + } + + let entries_to_group: Vec = group + .iter() + .filter(|addr| { + // reachable from outside of the group + rev_nodes.get(addr).map_or(false, |x| { + x.iter().any(|incoming| !group.contains(incoming)) + }) + }) + .copied() + .collect(); + + if entries_to_group.len() != 1 { + dbg_log!( + "Compiling multi-entry loop with {} entries and {} basic blocks", + entries_to_group.len(), + group.len() + ); + } + + let max_extra_basic_blocks = 100; + + if entries_to_group.len() * group.len() > max_extra_basic_blocks { + let mut subgroup_edges: Graph = Graph::new(); + for elem in group { + subgroup_edges.insert( + *elem, + nodes + .get(&elem) + .unwrap() + .iter() + .filter(|dest| { + // XXX: This might remove forward edges to other loop entries + // Probably not an issue since it can go through the + // dispatcher + group.contains(dest) && !entries_to_group.contains(dest) + }) + .copied() + .collect(), + ); + } + + let mut loop_nodes = loopify(&subgroup_edges); + + if entries_to_group.len() > 1 { + loop_nodes.insert(0, WasmStructure::Dispatcher(entries_to_group)); + } + + return vec![WasmStructure::Loop(loop_nodes)].into_iter(); + } + else { + profiler::stat_increment_by( + profiler::stat::COMPILE_DUPLICATED_BASIC_BLOCK, + ((entries_to_group.len() - 1) * group.len()) as u64, + ); + + let nodes: Vec = entries_to_group + .iter() + .map(|&entry| { + let mut subgroup_edges: Graph = Graph::new(); + for &elem in group { + subgroup_edges.insert( + elem, + nodes + .get(&elem) + .unwrap() + .iter() + .copied() + .filter(|dest| group.contains(dest) && *dest != entry) + .collect(), + ); + } + let loop_nodes = loopify(&subgroup_edges); + WasmStructure::Loop(loop_nodes) + }) + .collect(); + + nodes.into_iter() + } + }) + .collect(); +} + +pub fn blockify(blocks: &mut Vec, edges: &Graph) { + let mut cached_branches: Vec> = Vec::new(); + for i in 0..blocks.len() { + cached_branches.push(blocks[i].branches(edges)); + } + + let mut i = 0; + while i < blocks.len() { + match &mut blocks[i] { + WasmStructure::BasicBlock(_) | WasmStructure::Dispatcher(_) => {}, + WasmStructure::Loop ( + blocks + ) + // TODO: Might be faster to do this *after* inserting blocks in this block + | WasmStructure::Block(blocks) => blockify(blocks, edges), + } + + let source = { + let mut source = None; + for j in 0..i { + if blocks[i].head().any(|bb| cached_branches[j].contains(&bb)) { + source = Some(j); + break; + } + } + match source { + Some(s) => s, + None => { + i += 1; + continue; + }, + } + }; + + // This is optional: Avoid putting a single basic block into a block + if source == i - 1 { + match &blocks[source] { + &WasmStructure::BasicBlock(_) => { + i += 1; + continue; + }, + _ => {}, + } + } + + let replacement = WasmStructure::Block(Vec::new()); + let children: Vec = + blocks.splice(source..i, iter::once(replacement)).collect(); + match &mut blocks[source] { + WasmStructure::Block(c) => c.extend(children), + _ => dbg_assert!(false), + } + match &blocks[source + 1] { + WasmStructure::BasicBlock(_) => + //dbg_assert!(*b == bbs.next().unwrap()) + {} + WasmStructure::Dispatcher(_) => {}, + WasmStructure::Loop(_blocks) | WasmStructure::Block(_blocks) => {}, //dbg_assert!(blocks[0].head() == bb), + } + + { + let replacement = HashSet::new(); + let children: Vec> = cached_branches + .splice(source..i, iter::once(replacement)) + .collect(); + dbg_assert!(cached_branches[source].len() == 0); + let mut iter = children.into_iter(); + cached_branches[source] = iter.next().unwrap(); + for c in iter { + cached_branches[source].extend(c); + } + } + + // skip the inserted block and this block + i = source + 2; + } +} diff --git a/src/rust/jit.rs b/src/rust/jit.rs index 39776eac..56267c51 100644 --- a/src/rust/jit.rs +++ b/src/rust/jit.rs @@ -1,10 +1,12 @@ -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::iter::FromIterator; use std::mem; use std::ptr::NonNull; use analysis::AnalysisType; use codegen; +use control_flow; +use control_flow::WasmStructure; use cpu::cpu; use cpu::global_pointers; use cpu::memory; @@ -53,13 +55,16 @@ pub fn jit_clear_func(wasm_table_index: WasmTableIndex) { unsafe { unsafe_jit::jit_clear_func(wasm_table_index) } } +// less branches will generate if-else, more will generate brtable +pub const BRTABLE_CUTOFF: usize = 10; + pub const WASM_TABLE_SIZE: u32 = 900; pub const HASH_PRIME: u32 = 6151; pub const CHECK_JIT_STATE_INVARIANTS: bool = false; -pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 20011; +pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 100003; pub const JIT_ALWAYS_USE_LOOP_SAFETY: bool = true; @@ -100,7 +105,7 @@ pub struct Entry { } enum PageState { - Compiling { basic_blocks: Vec }, + Compiling { entries: Vec<(u32, Entry)> }, CompilingWritten, } @@ -155,7 +160,7 @@ impl JitState { } #[derive(PartialEq, Eq)] -enum BasicBlockType { +pub enum BasicBlockType { Normal { next_block_addr: u32, }, @@ -166,18 +171,20 @@ enum BasicBlockType { jump_offset: i32, jump_offset_is_32: bool, }, + // Set eip to an absolute value (ret, jmp r/m, call r/m) + AbsoluteEip, Exit, } -struct BasicBlock { - addr: u32, - virt_addr: i32, - last_instruction_addr: u32, - end_addr: u32, - is_entry_block: bool, - ty: BasicBlockType, - has_sti: bool, - number_of_instructions: u32, +pub struct BasicBlock { + pub addr: u32, + pub virt_addr: i32, + pub last_instruction_addr: u32, + pub end_addr: u32, + pub is_entry_block: bool, + pub ty: BasicBlockType, + pub has_sti: bool, + pub number_of_instructions: u32, } #[derive(Copy, Clone, PartialEq)] @@ -198,12 +205,8 @@ pub struct JitContext<'a> { pub builder: &'a mut WasmBuilder, pub register_locals: &'a mut Vec, pub start_of_current_instruction: u32, - pub main_loop_label: Label, pub exit_with_fault_label: Label, pub exit_label: Label, - pub our_wasm_table_index: WasmTableIndex, - pub basic_block_index_local: &'a WasmLocal, - pub state_flags: CachedStateFlags, } pub const JIT_INSTR_BLOCK_BOUNDARY_FLAG: u32 = 1 << 0; @@ -245,6 +248,8 @@ pub fn jit_find_cache_entry_in_page( wasm_table_index: WasmTableIndex, state_flags: u32, ) -> i32 { + profiler::stat_increment(stat::INDIRECT_JUMP); + let state_flags = CachedStateFlags::of_u32(state_flags); let ctx = get_jit_state(); @@ -257,6 +262,8 @@ pub fn jit_find_cache_entry_in_page( None => {}, } + profiler::stat_increment(stat::INDIRECT_JUMP_NO_ENTRY); + return -1; } @@ -366,6 +373,7 @@ fn jit_find_basic_blocks( match analysis.ty { AnalysisType::Normal | AnalysisType::STI => { dbg_assert!(has_next_instruction); + dbg_assert!(!analysis.absolute_jump); if current_block.has_sti { // Convert next instruction after STI (i.e., the current instruction) into block boundary @@ -406,6 +414,7 @@ fn jit_find_basic_blocks( is_32, condition: Some(condition), } => { + dbg_assert!(!analysis.absolute_jump); // conditional jump: continue at next and continue at jump target let jump_target = if is_32 { @@ -464,6 +473,7 @@ fn jit_find_basic_blocks( is_32, condition: None, } => { + dbg_assert!(!analysis.absolute_jump); // non-conditional jump: continue at jump target let jump_target = if is_32 { @@ -517,6 +527,10 @@ fn jit_find_basic_blocks( to_visit_stack.push(current_virt_addr); } + if analysis.absolute_jump { + current_block.ty = BasicBlockType::AbsoluteEip; + } + current_block.last_instruction_addr = addr_before_instruction; current_block.end_addr = current_address; break; @@ -618,7 +632,6 @@ fn jit_analyze_and_generate( let entry_points = ctx.entry_points.remove(&page); if let Some(entry_points) = entry_points { - dbg_log!("Compile code for page at {:x}", page.to_address()); profiler::stat_increment(stat::COMPILE); let cpu = CpuContext { @@ -647,8 +660,79 @@ fn jit_analyze_and_generate( pages.insert(Page::page_of(b.addr)); } + let print = false; + + for b in basic_blocks.iter() { + if !print { + break; + } + let last_instruction_opcode = memory::read32s(b.last_instruction_addr); + let op = ::opstats::decode(last_instruction_opcode as u32); + dbg_log!( + "BB: 0x{:x} {}{:02x} {} {}", + b.addr, + if op.is_0f { "0f" } else { "" }, + op.opcode, + if b.is_entry_block { "entry" } else { "noentry" }, + match &b.ty { + BasicBlockType::ConditionalJump { + next_block_addr: Some(next_block_addr), + next_block_branch_taken_addr: Some(next_block_branch_taken_addr), + .. + } => format!( + "0x{:x} 0x{:x}", + next_block_addr, next_block_branch_taken_addr + ), + BasicBlockType::ConditionalJump { + next_block_addr: None, + next_block_branch_taken_addr: Some(next_block_branch_taken_addr), + .. + } => format!("0x{:x}", next_block_branch_taken_addr), + BasicBlockType::ConditionalJump { + next_block_addr: Some(next_block_addr), + next_block_branch_taken_addr: None, + .. + } => format!("0x{:x}", next_block_addr), + BasicBlockType::ConditionalJump { + next_block_addr: None, + next_block_branch_taken_addr: None, + .. + } => format!(""), + BasicBlockType::Normal { next_block_addr } => + format!("0x{:x}", next_block_addr), + BasicBlockType::Exit => format!(""), + BasicBlockType::AbsoluteEip => format!(""), + } + ); + } + + let graph = control_flow::make_graph(&basic_blocks); + let mut structure = control_flow::loopify(&graph); + + if print { + dbg_log!("before blockify:"); + for group in &structure { + dbg_log!("=> Group"); + group.print(0); + } + } + + control_flow::blockify(&mut structure, &graph); + + if cfg!(debug_assertions) { + control_flow::assert_invariants(&structure); + } + + if print { + dbg_log!("after blockify:"); + for group in &structure { + dbg_log!("=> Group"); + group.print(0); + } + } + if ctx.wasm_table_index_free_list.is_empty() { - dbg_log!("wasm_table_index_free_list empty, clearing cache",); + dbg_log!("wasm_table_index_free_list empty, clearing cache"); // When no free slots are available, delete all cached modules. We could increase the // size of the table, but this way the initial size acts as an upper bound for the @@ -681,13 +765,18 @@ fn jit_analyze_and_generate( .insert(wasm_table_index, pages.clone()); ctx.all_pages.extend(pages.clone()); - jit_generate_module( - &basic_blocks, + let basic_block_by_addr: HashMap = + basic_blocks.into_iter().map(|b| (b.addr, b)).collect(); + + let entries = jit_generate_module( + structure, + &basic_block_by_addr, cpu.clone(), &mut ctx.wasm_builder, wasm_table_index, state_flags, ); + dbg_assert!(!entries.is_empty()); profiler::stat_increment_by( stat::COMPILE_WASM_TOTAL_BYTES, @@ -700,7 +789,7 @@ fn jit_analyze_and_generate( } dbg_assert!(ctx.compiling.is_none()); - ctx.compiling = Some((wasm_table_index, PageState::Compiling { basic_blocks })); + ctx.compiling = Some((wasm_table_index, PageState::Compiling { entries })); let phys_addr = page.to_address(); @@ -738,7 +827,7 @@ pub fn codegen_finalize_finished( Page::page_of(phys_addr).to_address() ); - let basic_blocks = match mem::replace(&mut ctx.compiling, None) { + let entries = match mem::replace(&mut ctx.compiling, None) { None => { dbg_assert!(false); return; @@ -750,56 +839,28 @@ pub fn codegen_finalize_finished( free_wasm_table_index(ctx, wasm_table_index); return; }, - Some((in_progress_wasm_table_index, PageState::Compiling { basic_blocks })) => { + Some((in_progress_wasm_table_index, PageState::Compiling { entries })) => { dbg_assert!(wasm_table_index == in_progress_wasm_table_index); - basic_blocks + entries }, }; - // create entries for each basic block that is marked as an entry point - let mut entry_point_count = 0; - let mut check_for_unused_wasm_table_index = HashSet::new(); - for (i, block) in basic_blocks.iter().enumerate() { - profiler::stat_increment(stat::COMPILE_BASIC_BLOCK); + dbg_assert!(!entries.is_empty()); + for (addr, entry) in entries { + let maybe_old_entry = ctx.cache.insert(addr, entry); - dbg_assert!(block.addr < block.end_addr); - if block.is_entry_block { - let initial_state = i.safe_to_u16(); + if let Some(old_entry) = maybe_old_entry { + check_for_unused_wasm_table_index.insert(old_entry.wasm_table_index); - let entry = Entry { - wasm_table_index, - initial_state, - state_flags, - - #[cfg(any(debug_assertions, feature = "profiler"))] - len: block.end_addr - block.addr, - - #[cfg(debug_assertions)] - opcode: memory::read32s(block.addr) as u32, - }; - - let maybe_old_entry = ctx.cache.insert(block.addr, entry); - - if let Some(old_entry) = maybe_old_entry { - check_for_unused_wasm_table_index.insert(old_entry.wasm_table_index); - - if old_entry.state_flags == state_flags { - // TODO: stat - } - else { - // TODO: stat - } + profiler::stat_increment(stat::JIT_CACHE_OVERRIDE); + if old_entry.state_flags != state_flags { + profiler::stat_increment(stat::JIT_CACHE_OVERRIDE_DIFFERENT_STATE_FLAGS) } - - entry_point_count += 1; - profiler::stat_increment(stat::COMPILE_ENTRY_POINT); } } - dbg_assert!(entry_point_count > 0); - for index in check_for_unused_wasm_table_index { let pages = ctx.used_wasm_table_indices.get(&index).unwrap(); @@ -841,33 +902,15 @@ pub fn codegen_finalize_finished( } fn jit_generate_module( - basic_blocks: &Vec, + structure: Vec, + basic_blocks: &HashMap, mut cpu: CpuContext, builder: &mut WasmBuilder, wasm_table_index: WasmTableIndex, state_flags: CachedStateFlags, -) { +) -> Vec<(u32, Entry)> { builder.reset(); - let basic_block_indices: HashMap = basic_blocks - .iter() - .enumerate() - .map(|(index, block)| (block.addr, index as u32)) - .collect(); - - // set state local variable to the initial state passed as the first argument - builder.get_local(&builder.arg_local_initial_state.unsafe_clone()); - let gen_local_state = builder.set_new_local(); - - // initialise max_iterations - let gen_local_iteration_counter = if JIT_ALWAYS_USE_LOOP_SAFETY { - builder.const_i32(JIT_MAX_ITERATIONS_PER_FUNCTION as i32); - Some(builder.set_new_local()) - } - else { - None - }; - let mut register_locals = (0..8) .map(|i| { builder.load_fixed_i32(global_pointers::get_reg32_offset(i)); @@ -875,232 +918,596 @@ fn jit_generate_module( }) .collect(); + let loop_limit_local = if JIT_ALWAYS_USE_LOOP_SAFETY { + builder.const_i32(JIT_MAX_ITERATIONS_PER_FUNCTION as i32); + Some(builder.set_new_local()) + } + else { + None + }; + let exit_label = builder.block_void(); - - // main state machine loop + let exit_with_fault_label = builder.block_void(); let main_loop_label = builder.loop_void(); - - builder.block_void(); // for the default case - let exit_with_fault_label = builder.block_void(); // for the exit-with-fault case + if let Some(loop_limit_local) = loop_limit_local.as_ref() { + builder.get_local(loop_limit_local); + builder.const_i32(-1); + builder.add_i32(); + builder.tee_local(loop_limit_local); + builder.eqz_i32(); + if cfg!(feature = "profiler") { + builder.if_void(); + codegen::gen_debug_track_jit_exit(builder, 0); + builder.br(exit_label); + builder.block_end(); + } + else { + builder.br_if(exit_label); + } + } + let brtable_default = builder.block_void(); let ctx = &mut JitContext { cpu: &mut cpu, builder, register_locals: &mut register_locals, start_of_current_instruction: 0, - main_loop_label, exit_with_fault_label, exit_label, - our_wasm_table_index: wasm_table_index, - basic_block_index_local: &gen_local_state, - state_flags, }; - if let Some(gen_local_iteration_counter) = gen_local_iteration_counter.as_ref() { - profiler::stat_increment(stat::COMPILE_WITH_LOOP_SAFETY); - - // decrement max_iterations - ctx.builder.get_local(gen_local_iteration_counter); - ctx.builder.const_i32(-1); - ctx.builder.add_i32(); - ctx.builder.set_local(gen_local_iteration_counter); - - // if max_iterations == 0: return - ctx.builder.get_local(gen_local_iteration_counter); - ctx.builder.eqz_i32(); - ctx.builder.if_void(); - codegen::gen_debug_track_jit_exit(ctx.builder, 0); - ctx.builder.br(exit_label); - ctx.builder.block_end(); - } - - // generate the opening blocks for the cases - - for _ in 0..basic_blocks.len() { - ctx.builder.block_void(); - } - - ctx.builder.get_local(&gen_local_state); - ctx.builder.brtable_and_cases(basic_blocks.len() as u32); - - for (i, block) in basic_blocks.iter().enumerate() { - // Case [i] will jump after the [i]th block, so we first generate the - // block end opcode and then the code for that block - ctx.builder.block_end(); - - dbg_assert!(block.addr < block.end_addr); - - jit_generate_basic_block(ctx, block); - - let invalid_connection_to_next_block = block.end_addr != ctx.cpu.eip; - dbg_assert!(!invalid_connection_to_next_block); - - if block.has_sti { - match block.ty { - BasicBlockType::ConditionalJump { - condition, - jump_offset, - jump_offset_is_32, - .. - } => { - codegen::gen_condition_fn(ctx, condition); - ctx.builder.if_void(); - if jump_offset_is_32 { - codegen::gen_relative_jump(ctx.builder, jump_offset); - } - else { - codegen::gen_jmp_rel16(ctx.builder, jump_offset as u16); - } - ctx.builder.block_end(); + let entry_blocks = { + let mut nodes = &structure; + let result; + loop { + match &nodes[0] { + WasmStructure::Dispatcher(e) => { + result = e.clone(); + break; }, - BasicBlockType::Normal { .. } => {}, - BasicBlockType::Exit => {}, - }; - codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); - codegen::gen_move_registers_from_locals_to_memory(ctx); - codegen::gen_fn0_const(ctx.builder, "handle_irqs"); - ctx.builder.return_(); - continue; + WasmStructure::Loop { .. } => dbg_assert!(false), + WasmStructure::BasicBlock(_) => dbg_assert!(false), + // Note: We could use these blocks as entry points, which will yield + // more entries for free, but it requires adding those to the dispatcher + // It's to be investigated if this yields a performance improvement + // See also the comment at the bottom of this function when creating entry + // points + WasmStructure::Block(children) => { + nodes = children; + }, + } } + result + }; - match &block.ty { - BasicBlockType::Exit => { - // Exit this function - codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); - ctx.builder.br(exit_label); - }, - BasicBlockType::Normal { next_block_addr } => { - // Unconditional jump to next basic block - // - All instructions that don't change eip - // - Unconditional jump + let mut index_for_addr = HashMap::new(); + for (i, &addr) in entry_blocks.iter().enumerate() { + index_for_addr.insert(addr, i as i32); + } + for b in basic_blocks.values() { + if !index_for_addr.contains_key(&b.addr) { + let i = index_for_addr.len(); + index_for_addr.insert(b.addr, i as i32); + } + } - if Page::page_of(*next_block_addr) != Page::page_of(block.addr) { - codegen::gen_page_switch_check( - ctx, - *next_block_addr, - block.last_instruction_addr, - ); + let mut label_for_addr: HashMap)> = HashMap::new(); - #[cfg(debug_assertions)] - codegen::gen_fn2_const( - ctx.builder, - "check_page_switch", - block.addr, - *next_block_addr, - ); - } + enum Work { + WasmStructure(WasmStructure), + BlockEnd { + label: Label, + targets: Vec, + olds: HashMap)>, + }, + LoopEnd { + label: Label, + entries: Vec, + olds: HashMap)>, + }, + } + let mut work: VecDeque = structure + .into_iter() + .map(|x| Work::WasmStructure(x)) + .collect(); - let next_basic_block_index = *basic_block_indices - .get(&next_block_addr) - .expect("basic_block_indices.get (Normal)"); + while let Some(block) = work.pop_front() { + let next_addr: Option> = work.iter().find_map(|x| match x { + Work::WasmStructure(l) => Some(l.head().collect()), + _ => None, + }); + let target_block = &ctx.builder.arg_local_initial_state.unsafe_clone(); - if next_basic_block_index == (i as u32) + 1 { - // fallthru - } - else { - // set state variable to next basic block - ctx.builder.const_i32(next_basic_block_index as i32); - ctx.builder.set_local(&gen_local_state); + match block { + Work::WasmStructure(WasmStructure::BasicBlock(addr)) => { + let block = basic_blocks.get(&addr).unwrap(); + jit_generate_basic_block(ctx, &block); - ctx.builder.br(main_loop_label); - } - }, - &BasicBlockType::ConditionalJump { - next_block_addr, - next_block_branch_taken_addr, - condition, - jump_offset, - jump_offset_is_32, - } => { - // Conditional jump to next basic block - // - jnz, jc, loop, jcxz, etc. - - codegen::gen_condition_fn(ctx, condition); - ctx.builder.if_void(); - - // Branch taken - - if jump_offset_is_32 { - codegen::gen_relative_jump(ctx.builder, jump_offset); - } - else { - codegen::gen_jmp_rel16(ctx.builder, jump_offset as u16); - } - - if let Some(next_block_branch_taken_addr) = next_block_branch_taken_addr { - let next_basic_block_branch_taken_index = *basic_block_indices - .get(&next_block_branch_taken_addr) - .expect("basic_block_indices.get (branch taken)"); - - dbg_assert!( - (block.end_addr + jump_offset as u32) & 0xFFF - == next_block_branch_taken_addr & 0xFFF - ); - - if Page::page_of(next_block_branch_taken_addr) != Page::page_of(block.addr) { - codegen::gen_page_switch_check( - ctx, - next_block_branch_taken_addr, - block.last_instruction_addr, - ); - - #[cfg(debug_assertions)] - codegen::gen_fn2_const( - ctx.builder, - "check_page_switch", - block.addr, - next_block_branch_taken_addr, - ); - } - - ctx.builder - .const_i32(next_basic_block_branch_taken_index as i32); - ctx.builder.set_local(&gen_local_state); - - ctx.builder.br(main_loop_label); - } - else { - // Jump to different page + if block.has_sti { + match block.ty { + BasicBlockType::ConditionalJump { + condition, + jump_offset, + jump_offset_is_32, + .. + } => { + codegen::gen_condition_fn(ctx, condition); + ctx.builder.if_void(); + if jump_offset_is_32 { + codegen::gen_relative_jump(ctx.builder, jump_offset); + } + else { + codegen::gen_jmp_rel16(ctx.builder, jump_offset as u16); + } + ctx.builder.block_end(); + }, + BasicBlockType::Normal { .. } => {}, + BasicBlockType::Exit => {}, + BasicBlockType::AbsoluteEip => {}, + }; codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); - ctx.builder.br(exit_label); + codegen::gen_move_registers_from_locals_to_memory(ctx); + codegen::gen_fn0_const(ctx.builder, "handle_irqs"); + ctx.builder.return_(); + continue; } - if let Some(next_block_addr) = next_block_addr { - dbg_assert!(Page::page_of(next_block_addr) == Page::page_of(block.addr)); - // Branch not taken + match &block.ty { + BasicBlockType::Exit => { + // Exit this function + codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); + codegen::gen_profiler_stat_increment(ctx.builder, stat::DIRECT_EXIT); + ctx.builder.br(ctx.exit_label); + }, + BasicBlockType::AbsoluteEip => { + // Check if we can stay in this module, if not exit + codegen::gen_get_eip(ctx.builder); + let new_eip = ctx.builder.set_new_local(); + codegen::gen_get_phys_eip(ctx, &new_eip); + ctx.builder.free_local(new_eip); - let next_basic_block_index = *basic_block_indices - .get(&next_block_addr) - .expect("basic_block_indices.get (branch not taken)"); + ctx.builder.const_i32(wasm_table_index.to_u16() as i32); + ctx.builder.const_i32(state_flags.to_u32() as i32); + ctx.builder.call_fn3_ret("jit_find_cache_entry_in_page"); + ctx.builder.tee_local(target_block); + ctx.builder.const_i32(0); + ctx.builder.ge_i32(); + // TODO: Could make this unconditional by including exit_label in the main br_table + ctx.builder.br_if(main_loop_label); - if next_basic_block_index == (i as u32) + 1 { - // fallthru + codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); + ctx.builder.br(ctx.exit_label); + }, + BasicBlockType::Normal { next_block_addr } => { + // Unconditional jump to next basic block + // - All instructions that don't change eip + // - Unconditional jumps + if Page::page_of(*next_block_addr) != Page::page_of(block.addr) { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::NORMAL_PAGE_CHANGE, + ); + + codegen::gen_page_switch_check( + ctx, + *next_block_addr, + block.last_instruction_addr, + ); + + #[cfg(debug_assertions)] + codegen::gen_fn2_const( + ctx.builder, + "check_page_switch", + block.addr, + *next_block_addr, + ); + } + + if next_addr + .as_ref() + .map_or(false, |n| n.contains(next_block_addr)) + { + // Blocks are consecutive + if next_addr.unwrap().len() > 1 { + let target_index = *index_for_addr.get(next_block_addr).unwrap(); + if cfg!(feature = "profiler") { + ctx.builder.const_i32(target_index); + ctx.builder.call_fn1("debug_set_dispatcher_target"); + } + ctx.builder.const_i32(target_index); + ctx.builder.set_local(target_block); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::NORMAL_FALLTHRU_WITH_TARGET_BLOCK, + ); + } + else { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::NORMAL_FALLTHRU, + ); + } + } + else { + let &(br, target_index) = label_for_addr.get(&next_block_addr).unwrap(); + if let Some(target_index) = target_index { + if cfg!(feature = "profiler") { + ctx.builder.const_i32(target_index); + ctx.builder.call_fn1("debug_set_dispatcher_target"); + } + ctx.builder.const_i32(target_index); + ctx.builder.set_local(target_block); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::NORMAL_BRANCH_WITH_TARGET_BLOCK, + ); + } + else { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::NORMAL_BRANCH, + ); + } + ctx.builder.br(br); + } + }, + &BasicBlockType::ConditionalJump { + next_block_addr, + next_block_branch_taken_addr, + condition, + jump_offset, + jump_offset_is_32, + } => { + // Conditional jump to next basic block + // - jnz, jc, loop, jcxz, etc. + + codegen::gen_profiler_stat_increment(ctx.builder, stat::CONDITIONAL_JUMP); + codegen::gen_condition_fn(ctx, condition); + ctx.builder.if_void(); + + // Branch taken + + if jump_offset_is_32 { + codegen::gen_relative_jump(ctx.builder, jump_offset); + } + else { + codegen::gen_jmp_rel16(ctx.builder, jump_offset as u16); + } + + if let Some(next_block_branch_taken_addr) = next_block_branch_taken_addr { + dbg_assert!( + (block.end_addr + jump_offset as u32) & 0xFFF + == next_block_branch_taken_addr & 0xFFF + ); + + if Page::page_of(next_block_branch_taken_addr) + != Page::page_of(block.addr) + { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_PAGE_CHANGE, + ); + codegen::gen_page_switch_check( + ctx, + next_block_branch_taken_addr, + block.last_instruction_addr, + ); + + #[cfg(debug_assertions)] + codegen::gen_fn2_const( + ctx.builder, + "check_page_switch", + block.addr, + next_block_branch_taken_addr, + ); + } + + if next_addr + .as_ref() + .map_or(false, |n| n.contains(&next_block_branch_taken_addr)) + { + // blocks are consecutive + if next_addr.as_ref().unwrap().len() > 1 { + let target_index = + *index_for_addr.get(&next_block_branch_taken_addr).unwrap(); + if cfg!(feature = "profiler") { + ctx.builder.const_i32(target_index); + ctx.builder.call_fn1("debug_set_dispatcher_target"); + } + ctx.builder.const_i32(target_index); + ctx.builder.set_local(target_block); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_FALLTHRU_WITH_TARGET_BLOCK, + ); + } + else { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_FALLTHRU, + ); + } + } + else { + let &(br, target_index) = + label_for_addr.get(&next_block_branch_taken_addr).unwrap(); + if let Some(target_index) = target_index { + if cfg!(feature = "profiler") { + ctx.builder.const_i32(target_index); + ctx.builder.call_fn1("debug_set_dispatcher_target"); + } + ctx.builder.const_i32(target_index); + ctx.builder.set_local(target_block); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_BRANCH_WITH_TARGET_BLOCK, + ); + } + else { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_BRANCH, + ); + } + ctx.builder.br(br); + } + } + else { + // Jump to different page + // TODO: Could generate br_if + codegen::gen_debug_track_jit_exit( + ctx.builder, + block.last_instruction_addr, + ); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_EXIT, + ); + ctx.builder.br(ctx.exit_label); + } + + if let Some(next_block_addr) = next_block_addr { + dbg_assert!( + Page::page_of(next_block_addr) == Page::page_of(block.addr) + ); + // Branch not taken + + if next_addr + .as_ref() + .map_or(false, |n| n.contains(&next_block_addr)) + { + // nothing to do: blocks are consecutive + ctx.builder.else_(); + if next_addr.unwrap().len() > 1 { + let target_index = + *index_for_addr.get(&next_block_addr).unwrap(); + if cfg!(feature = "profiler") { + ctx.builder.const_i32(target_index); + ctx.builder.call_fn1("debug_set_dispatcher_target"); + } + ctx.builder.const_i32(target_index); + ctx.builder.set_local(target_block); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_FALLTHRU_WITH_TARGET_BLOCK, + ); + } + else { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_FALLTHRU, + ); + } + ctx.builder.block_end(); + } + else { + ctx.builder.else_(); + let &(br, target_index) = + label_for_addr.get(&next_block_addr).unwrap(); + if let Some(target_index) = target_index { + if cfg!(feature = "profiler") { + ctx.builder.const_i32(target_index); + ctx.builder.call_fn1("debug_set_dispatcher_target"); + } + ctx.builder.const_i32(target_index); + ctx.builder.set_local(target_block); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_BRANCH_WITH_TARGET_BLOCK, + ); + } + else { + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_BRANCH, + ); + } + ctx.builder.br(br); + ctx.builder.block_end(); + } + } + else { + ctx.builder.else_(); + + // End of this page + codegen::gen_debug_track_jit_exit( + ctx.builder, + block.last_instruction_addr, + ); + codegen::gen_profiler_stat_increment( + ctx.builder, + stat::CONDITIONAL_JUMP_EXIT, + ); + ctx.builder.br(ctx.exit_label); + + ctx.builder.block_end(); + } + }, + } + }, + Work::WasmStructure(WasmStructure::Dispatcher(entries)) => { + profiler::stat_increment(stat::COMPILE_DISPATCHER); + + if cfg!(feature = "profiler") { + ctx.builder.get_local(target_block); + ctx.builder.const_i32(index_for_addr.len() as i32); + ctx.builder.call_fn2("check_dispatcher_target"); + } + + if entries.len() > BRTABLE_CUTOFF { + // generate a brtable + codegen::gen_profiler_stat_increment(ctx.builder, stat::DISPATCHER_LARGE); + let mut cases = Vec::new(); + for &addr in &entries { + let &(label, target_index) = label_for_addr.get(&addr).unwrap(); + let &index = index_for_addr.get(&addr).unwrap(); + dbg_assert!(target_index.is_none() || target_index == Some(index)); + while index as usize >= cases.len() { + cases.push(brtable_default); + } + cases[index as usize] = label; + } + ctx.builder.get_local(target_block); + ctx.builder.brtable(brtable_default, &mut cases.iter()); + } + else { + // generate a if target == block.addr then br block.label ... + codegen::gen_profiler_stat_increment(ctx.builder, stat::DISPATCHER_SMALL); + let nexts: HashSet = next_addr + .as_ref() + .map_or(HashSet::new(), |nexts| nexts.iter().copied().collect()); + for &addr in &entries { + if nexts.contains(&addr) { + continue; + } + let index = *index_for_addr.get(&addr).unwrap(); + let &(label, _) = label_for_addr.get(&addr).unwrap(); + ctx.builder.get_local(target_block); + ctx.builder.const_i32(index); + ctx.builder.eq_i32(); + ctx.builder.br_if(label); + } + } + }, + Work::WasmStructure(WasmStructure::Loop(children)) => { + profiler::stat_increment(stat::COMPILE_WASM_LOOP); + codegen::gen_profiler_stat_increment(ctx.builder, stat::LOOP); + + let entries: Vec = children[0].head().collect(); + let label = ctx.builder.loop_void(); + + if let Some(loop_limit_local) = loop_limit_local.as_ref() { + ctx.builder.get_local(loop_limit_local); + ctx.builder.const_i32(-1); + ctx.builder.add_i32(); + ctx.builder.tee_local(loop_limit_local); + ctx.builder.eqz_i32(); + if cfg!(feature = "profiler") { + ctx.builder.if_void(); + codegen::gen_debug_track_jit_exit(ctx.builder, 0); + ctx.builder.br(ctx.exit_label); ctx.builder.block_end(); } else { - ctx.builder.else_(); - - ctx.builder.const_i32(next_basic_block_index as i32); - ctx.builder.set_local(&gen_local_state); - - ctx.builder.br(main_loop_label); - - ctx.builder.block_end(); + ctx.builder.br_if(ctx.exit_label); } } - else { - ctx.builder.else_(); - // End of this page - codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); - ctx.builder.br(exit_label); - - ctx.builder.block_end(); + let mut olds = HashMap::new(); + for &target in entries.iter() { + let index = if entries.len() == 1 { + None + } + else { + Some(*index_for_addr.get(&target).unwrap()) + }; + let old = label_for_addr.insert(target, (label, index)); + if let Some(old) = old { + olds.insert(target, old); + } } + + work.push_front(Work::LoopEnd { + label, + entries, + olds, + }); + for c in children.into_iter().rev() { + work.push_front(Work::WasmStructure(c)); + } + }, + Work::LoopEnd { + label, + entries, + olds, + } => { + for target in entries { + let old = label_for_addr.remove(&target); + dbg_assert!(old.map(|(l, _)| l) == Some(label)); + } + for (target, old) in olds { + let old = label_for_addr.insert(target, old); + dbg_assert!(old.is_none()); + } + + ctx.builder.block_end(); + }, + Work::WasmStructure(WasmStructure::Block(children)) => { + profiler::stat_increment(stat::COMPILE_WASM_BLOCK); + + let targets = next_addr.clone().unwrap(); + let label = ctx.builder.block_void(); + let mut olds = HashMap::new(); + for &target in targets.iter() { + let index = if targets.len() == 1 { + None + } + else { + Some(*index_for_addr.get(&target).unwrap()) + }; + let old = label_for_addr.insert(target, (label, index)); + if let Some(old) = old { + olds.insert(target, old); + } + } + + work.push_front(Work::BlockEnd { + label, + targets, + olds, + }); + for c in children.into_iter().rev() { + work.push_front(Work::WasmStructure(c)); + } + }, + Work::BlockEnd { + label, + targets, + olds, + } => { + for target in targets { + let old = label_for_addr.remove(&target); + dbg_assert!(old.map(|(l, _)| l) == Some(label)); + } + for (target, old) in olds { + let old = label_for_addr.insert(target, old); + dbg_assert!(old.is_none()); + } + + ctx.builder.block_end(); }, } } + dbg_assert!(label_for_addr.is_empty()); + + { + ctx.builder.block_end(); // default case for the brtable + ctx.builder.unreachable(); + } + { + ctx.builder.block_end(); // main loop + } { // exit-with-fault case ctx.builder.block_end(); @@ -1108,28 +1515,61 @@ fn jit_generate_module( codegen::gen_fn0_const(ctx.builder, "trigger_fault_end_jit"); ctx.builder.return_(); } - - ctx.builder.block_end(); // default case - ctx.builder.unreachable(); - - ctx.builder.block_end(); // loop - - ctx.builder.block_end(); // exit - codegen::gen_move_registers_from_locals_to_memory(ctx); - - ctx.builder.free_local(gen_local_state.unsafe_clone()); - if let Some(local) = gen_local_iteration_counter { - ctx.builder.free_local(local); + { + // exit + ctx.builder.block_end(); + codegen::gen_move_registers_from_locals_to_memory(ctx); } + if let Some(local) = loop_limit_local { + ctx.builder.free_local(local); + } for local in ctx.register_locals.drain(..) { ctx.builder.free_local(local); } ctx.builder.finish(); + + let mut entries = Vec::new(); + + for &addr in entry_blocks.iter() { + let block = basic_blocks.get(&addr).unwrap(); + let index = *index_for_addr.get(&addr).unwrap(); + + profiler::stat_increment(stat::COMPILE_ENTRY_POINT); + + dbg_assert!(block.addr < block.end_addr); + // Note: We also insert blocks that weren't originally marked as entries here + // This doesn't have any downside, besides making the hash table slightly larger + + let initial_state = index.safe_to_u16(); + + let entry = Entry { + wasm_table_index, + initial_state, + state_flags, + + #[cfg(any(debug_assertions, feature = "profiler"))] + len: block.end_addr - block.addr, + + #[cfg(debug_assertions)] + opcode: memory::read32s(block.addr) as u32, + }; + entries.push((block.addr, entry)); + } + + for b in basic_blocks.values() { + if b.is_entry_block { + dbg_assert!(entries.iter().find(|(addr, _)| *addr == b.addr).is_some()); + } + } + + return entries; } fn jit_generate_basic_block(ctx: &mut JitContext, block: &BasicBlock) { + profiler::stat_increment(stat::COMPILE_BASIC_BLOCK); + let start_addr = block.addr; let last_instruction_addr = block.last_instruction_addr; let stop_addr = block.end_addr; @@ -1137,6 +1577,11 @@ fn jit_generate_basic_block(ctx: &mut JitContext, block: &BasicBlock) { // First iteration of do-while assumes the caller confirms this condition dbg_assert!(!is_near_end_of_page(start_addr)); + if cfg!(feature = "profiler") { + ctx.builder.const_i32(start_addr as i32); + ctx.builder.call_fn1("enter_basic_block"); + } + codegen::gen_increment_timestamp_counter(ctx.builder, block.number_of_instructions as i32); ctx.cpu.eip = start_addr; @@ -1429,6 +1874,10 @@ pub fn jit_get_wasm_table_index_free_list_count() -> u32 { 0 } } +#[no_mangle] +pub fn jit_get_cache_size() -> u32 { + if cfg!(feature = "profiler") { get_jit_state().cache.len() as u32 } else { 0 } +} #[cfg(feature = "profiler")] pub fn check_missed_entry_points(phys_address: u32, state_flags: CachedStateFlags) { @@ -1474,3 +1923,29 @@ pub fn check_missed_entry_points(phys_address: u32, state_flags: CachedStateFlag } } } + +#[no_mangle] +pub fn debug_set_dispatcher_target(_target_index: i32) { + //dbg_log!("About to call dispatcher target_index={}", target_index); +} + +#[no_mangle] +pub fn check_dispatcher_target(target_index: i32, max: i32) { + //dbg_log!("Dispatcher called target={}", target_index); + dbg_assert!(target_index >= 0); + dbg_assert!(target_index < max); +} + +#[no_mangle] +pub fn enter_basic_block(phys_eip: u32) { + let eip = + unsafe { cpu::translate_address_read(*global_pointers::instruction_pointer).unwrap() }; + if eip != phys_eip { + dbg_log!( + "enter basic block failed block=0x{:x} actual eip=0x{:x}", + phys_eip, + eip + ); + panic!(); + } +} diff --git a/src/rust/jit_instructions.rs b/src/rust/jit_instructions.rs index 619fd9d8..12beac29 100644 --- a/src/rust/jit_instructions.rs +++ b/src/rust/jit_instructions.rs @@ -2702,33 +2702,37 @@ pub fn instr32_E9_jit(ctx: &mut JitContext, imm: u32) { } pub fn instr16_C2_jit(ctx: &mut JitContext, imm16: u32) { + ctx.builder.const_i32(0); codegen::gen_pop16(ctx); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); codegen::gen_adjust_stack_reg(ctx, imm16); - codegen::gen_absolute_indirect_jump(ctx, new_eip); } pub fn instr32_C2_jit(ctx: &mut JitContext, imm16: u32) { + ctx.builder.const_i32(0); codegen::gen_pop32s(ctx); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); codegen::gen_adjust_stack_reg(ctx, imm16); - codegen::gen_absolute_indirect_jump(ctx, new_eip); } pub fn instr16_C3_jit(ctx: &mut JitContext) { + ctx.builder.const_i32(0); codegen::gen_pop16(ctx); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr32_C3_jit(ctx: &mut JitContext) { + ctx.builder.const_i32(0); codegen::gen_pop32s(ctx); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr16_C9_jit(ctx: &mut JitContext) { codegen::gen_leave(ctx, false); } @@ -3782,19 +3786,23 @@ pub fn instr16_FF_2_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte) { codegen::gen_push16(ctx, &value_local); ctx.builder.free_local(value_local); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder.const_i32(0); + ctx.builder.get_local(&new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); + ctx.builder.free_local(new_eip); } pub fn instr16_FF_2_reg_jit(ctx: &mut JitContext, r: u32) { - codegen::gen_get_reg16(ctx, r); - codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_get_real_eip(ctx); let value_local = ctx.builder.set_new_local(); codegen::gen_push16(ctx, &value_local); ctx.builder.free_local(value_local); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder.const_i32(0); + codegen::gen_get_reg16(ctx, r); + codegen::gen_add_cs_offset(ctx); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr32_FF_2_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte) { codegen::gen_modrm_resolve_safe_read32(ctx, modrm_byte); @@ -3806,44 +3814,52 @@ pub fn instr32_FF_2_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte) { codegen::gen_push32(ctx, &value_local); ctx.builder.free_local(value_local); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder.const_i32(0); + ctx.builder.get_local(&new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); + ctx.builder.free_local(new_eip); } pub fn instr32_FF_2_reg_jit(ctx: &mut JitContext, r: u32) { - codegen::gen_get_reg32(ctx, r); - codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_get_real_eip(ctx); let value_local = ctx.builder.set_new_local(); codegen::gen_push32(ctx, &value_local); ctx.builder.free_local(value_local); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder.const_i32(0); + codegen::gen_get_reg32(ctx, r); + codegen::gen_add_cs_offset(ctx); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr16_FF_4_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte) { + ctx.builder.const_i32(0); codegen::gen_modrm_resolve_safe_read16(ctx, modrm_byte); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr16_FF_4_reg_jit(ctx: &mut JitContext, r: u32) { + ctx.builder.const_i32(0); codegen::gen_get_reg16(ctx, r); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr32_FF_4_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte) { + ctx.builder.const_i32(0); codegen::gen_modrm_resolve_safe_read32(ctx, modrm_byte); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr32_FF_4_reg_jit(ctx: &mut JitContext, r: u32) { + ctx.builder.const_i32(0); codegen::gen_get_reg32(ctx, r); codegen::gen_add_cs_offset(ctx); - let new_eip = ctx.builder.set_new_local(); - codegen::gen_absolute_indirect_jump(ctx, new_eip); + ctx.builder + .store_aligned_i32(global_pointers::instruction_pointer as u32); } pub fn instr16_FF_6_mem_jit(ctx: &mut JitContext, modrm_byte: ModrmByte) { diff --git a/src/rust/lib.rs b/src/rust/lib.rs index 172054b4..31cfc8e2 100644 --- a/src/rust/lib.rs +++ b/src/rust/lib.rs @@ -21,6 +21,7 @@ pub mod profiler; mod analysis; mod codegen; mod config; +mod control_flow; mod cpu_context; mod gen; mod jit; diff --git a/src/rust/opstats.rs b/src/rust/opstats.rs index 2f37db8c..c4a9b469 100644 --- a/src/rust/opstats.rs +++ b/src/rust/opstats.rs @@ -2,15 +2,15 @@ use cpu; use cpu::global_pointers; use wasmgen::wasm_builder::WasmBuilder; -struct Instruction { - prefixes: Vec, - opcode: u8, - fixed_g: u8, - is_mem: bool, - is_0f: bool, +pub struct Instruction { + pub prefixes: Vec, + pub opcode: u8, + pub fixed_g: u8, + pub is_mem: bool, + pub is_0f: bool, } -fn decode(mut instruction: u32) -> Instruction { +pub fn decode(mut instruction: u32) -> Instruction { let mut is_0f = false; let mut prefixes = vec![]; let mut final_opcode = 0; diff --git a/src/rust/profiler.rs b/src/rust/profiler.rs index 3559fc75..bd3c105b 100644 --- a/src/rust/profiler.rs +++ b/src/rust/profiler.rs @@ -1,4 +1,4 @@ -#[allow(dead_code, non_camel_case_types)] +#[allow(non_camel_case_types)] pub enum stat { COMPILE, COMPILE_SUCCESS, @@ -7,9 +7,15 @@ pub enum stat { COMPILE_WITH_LOOP_SAFETY, COMPILE_PAGE, COMPILE_BASIC_BLOCK, + COMPILE_DUPLICATED_BASIC_BLOCK, + COMPILE_WASM_BLOCK, + COMPILE_WASM_LOOP, + COMPILE_DISPATCHER, COMPILE_ENTRY_POINT, COMPILE_WASM_TOTAL_BYTES, - CACHE_MISMATCH, + + JIT_CACHE_OVERRIDE, + JIT_CACHE_OVERRIDE_DIFFERENT_STATE_FLAGS, RUN_INTERPRETED, RUN_INTERPRETED_PENDING, @@ -22,6 +28,25 @@ pub enum stat { RUN_FROM_CACHE, RUN_FROM_CACHE_STEPS, + DIRECT_EXIT, + INDIRECT_JUMP, + INDIRECT_JUMP_NO_ENTRY, + NORMAL_PAGE_CHANGE, + NORMAL_FALLTHRU, + NORMAL_FALLTHRU_WITH_TARGET_BLOCK, + NORMAL_BRANCH, + NORMAL_BRANCH_WITH_TARGET_BLOCK, + CONDITIONAL_JUMP, + CONDITIONAL_JUMP_PAGE_CHANGE, + CONDITIONAL_JUMP_EXIT, + CONDITIONAL_JUMP_FALLTHRU, + CONDITIONAL_JUMP_FALLTHRU_WITH_TARGET_BLOCK, + CONDITIONAL_JUMP_BRANCH, + CONDITIONAL_JUMP_BRANCH_WITH_TARGET_BLOCK, + DISPATCHER_SMALL, + DISPATCHER_LARGE, + LOOP, + FAILED_PAGE_CHANGE, SAFE_READ_FAST, diff --git a/src/rust/wasmgen/wasm_builder.rs b/src/rust/wasmgen/wasm_builder.rs index 0aa85128..46d3a3db 100644 --- a/src/rust/wasmgen/wasm_builder.rs +++ b/src/rust/wasmgen/wasm_builder.rs @@ -869,30 +869,31 @@ impl WasmBuilder { #[allow(dead_code)] pub fn drop_(&mut self) { self.instruction_body.push(op::OP_DROP); } - // Generate a br_table where an input of [i] will branch [i]th outer block, - // where [i] is passed on the wasm stack - pub fn brtable_and_cases(&mut self, cases_count: u32) { + pub fn brtable( + &mut self, + default_case: Label, + cases: &mut dyn std::iter::ExactSizeIterator, + ) { self.instruction_body.push(op::OP_BRTABLE); - write_leb_u32(&mut self.instruction_body, cases_count); - - for i in 0..(cases_count + 1) { - write_leb_u32(&mut self.instruction_body, i); + write_leb_u32(&mut self.instruction_body, cases.len() as u32); + for case in cases { + self.write_label(*case); } + self.write_label(default_case); } pub fn br(&mut self, label: Label) { - let depth = *self.label_to_depth.get(&label).unwrap(); - dbg_assert!(depth <= self.label_stack.len()); self.instruction_body.push(op::OP_BR); - write_leb_u32( - &mut self.instruction_body, - (self.label_stack.len() - depth) as u32, - ); + self.write_label(label); } pub fn br_if(&mut self, label: Label) { + self.instruction_body.push(op::OP_BRIF); + self.write_label(label); + } + + fn write_label(&mut self, label: Label) { let depth = *self.label_to_depth.get(&label).unwrap(); dbg_assert!(depth <= self.label_stack.len()); - self.instruction_body.push(op::OP_BRIF); write_leb_u32( &mut self.instruction_body, (self.label_stack.len() - depth) as u32,