use std::collections::{BTreeMap, HashMap, HashSet}; use std::iter::FromIterator; use analysis::AnalysisType; use codegen; use cpu; use cpu_context::CpuContext; use global_pointers; use jit_instructions; use page::Page; use profiler; use profiler::stat; use state_flags::CachedStateFlags; use util::SafeToU16; use wasmgen::module_init::{WasmBuilder, WasmLocal}; use wasmgen::wasm_util::WasmBuf; pub const WASM_TABLE_SIZE: u32 = 900; pub const HASH_PRIME: u32 = 6151; pub const CHECK_JIT_CACHE_ARRAY_INVARIANTS: bool = false; pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 10000; pub const JIT_ALWAYS_USE_LOOP_SAFETY: bool = true; pub const JIT_THRESHOLD: u32 = 200 * 1000; const CODE_CACHE_SEARCH_SIZE: u32 = 8; const MAX_INSTRUCTION_LENGTH: u32 = 16; mod jit_cache_array { use page::Page; use state_flags::CachedStateFlags; // Note: For performance reasons, this is global state. See jit_find_cache_entry const NO_NEXT_ENTRY: u32 = 0xffff_ffff; // When changing this, you also need to bump global-base pub const SIZE: u32 = 0x40000; pub const MASK: u32 = SIZE - 1; #[derive(Copy, Clone)] pub struct Entry { pub start_addr: u32, #[cfg(any(debug_assertions, feature = "profiler"))] pub len: u32, #[cfg(debug_assertions)] pub opcode: u32, // an index into jit_cache_array for the next code_cache entry within the same physical page next_index_same_page: u32, pub initial_state: u16, pub wasm_table_index: u16, pub state_flags: CachedStateFlags, pub pending: bool, } impl Entry { pub fn create( start_addr: u32, next_index_same_page: Option, wasm_table_index: u16, initial_state: u16, state_flags: CachedStateFlags, pending: bool, ) -> Entry { let next_index_same_page = next_index_same_page.unwrap_or(NO_NEXT_ENTRY); Entry { start_addr, next_index_same_page, wasm_table_index, initial_state, state_flags, pending, #[cfg(any(debug_assertions, feature = "profiler"))] len: 0, #[cfg(debug_assertions)] opcode: 0, } } pub fn next_index_same_page(&self) -> Option { if self.next_index_same_page == NO_NEXT_ENTRY { None } else { Some(self.next_index_same_page) } } pub fn set_next_index_same_page(&mut self, next_index: Option) { if let Some(i) = next_index { self.next_index_same_page = i } else { self.next_index_same_page = NO_NEXT_ENTRY } } } const DEFAULT_ENTRY: Entry = Entry { start_addr: 0, next_index_same_page: NO_NEXT_ENTRY, wasm_table_index: 0, initial_state: 0, state_flags: CachedStateFlags::EMPTY, pending: false, #[cfg(any(debug_assertions, feature = "profiler"))] len: 0, #[cfg(debug_assertions)] opcode: 0, }; #[allow(non_upper_case_globals)] pub const jit_cache_array: *mut Entry = ::global_pointers::JIT_CACHE_ARRAY as *mut Entry; #[allow(unreachable_code)] #[cfg(debug_assertions)] unsafe fn _static_assert() { std::mem::transmute::(panic!()); } #[allow(unreachable_code)] #[cfg(all(not(debug_assertions), not(feature = "profiler")))] unsafe fn _static_assert() { std::mem::transmute::(panic!()); } // XXX: Probably doesn't need to be statically allocated #[allow(non_upper_case_globals)] pub const page_first_entry: *mut u32 = ::global_pointers::JIT_PAGE_FIRST_ENTRY as *mut u32; pub fn get_page_index(page: Page) -> Option { let index = unsafe { *page_first_entry.offset(page.to_u32() as isize) }; if index == NO_NEXT_ENTRY { None } else { Some(index) } } pub fn set_page_index(page: Page, index: Option) { let index = index.unwrap_or(NO_NEXT_ENTRY); unsafe { *page_first_entry.offset(page.to_u32() as isize) = index } } pub fn get(i: u32) -> &'static Entry { unsafe { &*jit_cache_array.offset(i as isize) } } pub fn get_mut(i: u32) -> &'static mut Entry { unsafe { &mut *jit_cache_array.offset(i as isize) } } fn set(i: u32, entry: Entry) { unsafe { *jit_cache_array.offset(i as isize) = entry }; } pub fn insert(index: u32, mut entry: Entry) { let page = Page::page_of(entry.start_addr); let previous_entry_index = get_page_index(page); if let Some(previous_entry_index) = previous_entry_index { let previous_entry = get(previous_entry_index); if previous_entry.start_addr != 0 { dbg_assert!( Page::page_of(previous_entry.start_addr) == Page::page_of(entry.start_addr) ); } } set_page_index(page, Some(index)); entry.set_next_index_same_page(previous_entry_index); set(index, entry); } pub fn remove(index: u32) { let page = Page::page_of((get(index)).start_addr); let mut page_index = get_page_index(page); let mut did_remove = false; if page_index == Some(index) { set_page_index(page, (get(index)).next_index_same_page()); did_remove = true; } else { while let Some(page_index_ok) = page_index { let next_index = (get(page_index_ok)).next_index_same_page(); if next_index == Some(index) { (get_mut(page_index_ok)) .set_next_index_same_page((get(index)).next_index_same_page()); did_remove = true; break; } page_index = next_index; } } (get_mut(index)).set_next_index_same_page(None); dbg_assert!(did_remove); } pub fn clear() { unsafe { for i in 0..SIZE { *jit_cache_array.offset(i as isize) = DEFAULT_ENTRY; } for i in 0..0x100000 { *page_first_entry.offset(i) = NO_NEXT_ENTRY; } } } pub fn check_invariants() { if !::jit::CHECK_JIT_CACHE_ARRAY_INVARIANTS { return; } // there are no loops in the linked lists // https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_Tortoise_and_Hare for i in 0..(1 << 20) { let mut slow = get_page_index(Page::page_of(i << 12)); let mut fast = slow; while let Some(fast_ok) = fast { fast = (get(fast_ok)).next_index_same_page(); slow = (get(slow.unwrap())).next_index_same_page(); if let Some(fast_ok) = fast { fast = (get(fast_ok)).next_index_same_page(); } else { break; } dbg_assert!(slow != fast); } } let mut wasm_table_index_to_jit_cache_index = [0; ::jit::WASM_TABLE_SIZE as usize]; for i in 0..SIZE { let entry = get(i); dbg_assert!(entry.next_index_same_page().map_or(true, |i| i < SIZE)); if entry.pending { dbg_assert!(entry.start_addr != 0); dbg_assert!(entry.wasm_table_index != 0); } else { // an invalid entry has both its start_addr and wasm_table_index set to 0 // neither start_addr nor wasm_table_index are 0 for any valid entry dbg_assert!((entry.start_addr == 0) == (entry.wasm_table_index == 0)); } // having a next entry implies validity dbg_assert!(entry.next_index_same_page() == None || entry.start_addr != 0); // any valid wasm_table_index can only be used within a single page if entry.wasm_table_index != 0 { let j = wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize]; if j != 0 { let other_entry = get(j); dbg_assert!(other_entry.wasm_table_index == entry.wasm_table_index); dbg_assert!( Page::page_of(other_entry.start_addr) == Page::page_of(entry.start_addr) ); } else { wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize] = i as u32; } } if entry.start_addr != 0 { // valid entries can be reached from page_first_entry let mut reached = false; let page = Page::page_of(entry.start_addr); let mut cache_array_index = get_page_index(page); while let Some(index) = cache_array_index { let other_entry = get(index); if i as u32 == index { reached = true; break; } cache_array_index = other_entry.next_index_same_page(); } dbg_assert!(reached); } } } } pub struct JitState { // as an alternative to HashSet, we could use a bitmap of 4096 bits here // (faster, but uses much more memory) // or a compressed bitmap (likely faster) hot_code_addresses: [u32; HASH_PRIME as usize], wasm_table_index_free_list: Vec, wasm_table_index_pending_free: Vec, entry_points: HashMap>, wasm_builder: WasmBuilder, } impl JitState { pub fn create_and_initialise() -> JitState { let mut wasm_builder = WasmBuilder::new(); jit_cache_array::clear(); wasm_builder.init(); // don't assign 0 (XXX: Check) let wasm_table_indices = 1..=(WASM_TABLE_SIZE - 1) as u16; let mut c = JitState { hot_code_addresses: [0; HASH_PRIME as usize], wasm_table_index_free_list: Vec::from_iter(wasm_table_indices), wasm_table_index_pending_free: vec![], entry_points: HashMap::new(), wasm_builder, }; jit_empty_cache(&mut c); c } } #[derive(PartialEq, Eq)] enum BasicBlockType { Normal { next_block_addr: u32, }, ConditionalJump { next_block_addr: Option, next_block_branch_taken_addr: Option, condition: u8, jump_offset: i32, jump_offset_is_32: bool, }, Exit, } struct BasicBlock { addr: u32, last_instruction_addr: u32, end_addr: u32, is_entry_block: bool, ty: BasicBlockType, number_of_instructions: u32, } #[repr(C)] #[derive(Copy, Clone)] pub struct cached_code { pub wasm_table_index: u16, pub initial_state: u16, } impl cached_code { const NONE: cached_code = cached_code { wasm_table_index: 0, initial_state: 0, }; } pub struct JitContext<'a> { pub cpu: &'a mut CpuContext, pub builder: &'a mut WasmBuilder, pub register_locals: &'a mut Vec, pub start_of_current_instruction: u32, pub current_brtable_depth: u32, } pub const JIT_INSTR_BLOCK_BOUNDARY_FLAG: u32 = 1 << 0; fn jit_hot_hash_page(page: Page) -> u32 { page.to_u32() % HASH_PRIME } fn is_near_end_of_page(address: u32) -> bool { address & 0xFFF >= 0x1000 - MAX_INSTRUCTION_LENGTH } pub fn jit_find_cache_entry(phys_address: u32, state_flags: CachedStateFlags) -> cached_code { if is_near_end_of_page(phys_address) { profiler::stat_increment(stat::RUN_INTERPRETED_NEAR_END_OF_PAGE); } let mut run_interpreted_reason = None; for i in 0..CODE_CACHE_SEARCH_SIZE { let index = (phys_address + i) & jit_cache_array::MASK; let entry = jit_cache_array::get(index); if entry.start_addr == phys_address { if entry.pending { run_interpreted_reason = Some(stat::RUN_INTERPRETED_PENDING) } if entry.state_flags != state_flags { run_interpreted_reason = Some(stat::RUN_INTERPRETED_DIFFERENT_STATE) } } if is_near_end_of_page(phys_address) { dbg_assert!(entry.start_addr != phys_address); } if !entry.pending && entry.start_addr == phys_address && entry.state_flags == state_flags { #[cfg(debug_assertions)] // entry.opcode is not defined otherwise { dbg_assert!(cpu::read32(entry.start_addr) == entry.opcode); } return cached_code { wasm_table_index: entry.wasm_table_index, initial_state: entry.initial_state, }; } } if let Some(reason) = run_interpreted_reason { profiler::stat_increment(reason); } cached_code::NONE } pub fn record_entry_point(ctx: &mut JitState, phys_address: u32) { if is_near_end_of_page(phys_address) { return; } let page = Page::page_of(phys_address); let offset_in_page = phys_address as u16 & 0xFFF; let mut is_new = false; ctx.entry_points .entry(page) .or_insert_with(|| { is_new = true; HashSet::new() }) .insert(offset_in_page); if is_new { cpu::tlb_set_has_code(page, true); } } fn jit_find_basic_blocks( page: Page, entry_points: &HashSet, cpu: CpuContext, ) -> (Vec, bool) { let mut to_visit_stack: Vec = entry_points.iter().cloned().collect(); let mut marked_as_entry: HashSet = entry_points.clone(); let page_high_bits = page.to_address(); let mut basic_blocks: BTreeMap = BTreeMap::new(); let mut requires_loop_limit = false; while let Some(to_visit_offset) = to_visit_stack.pop() { let to_visit = to_visit_offset as u32 | page_high_bits; if basic_blocks.contains_key(&to_visit) { continue; } if is_near_end_of_page(to_visit) { // Empty basic block, don't insert profiler::stat_increment(stat::COMPILE_CUT_OFF_AT_END_OF_PAGE); continue; } let mut current_address = to_visit; let mut current_block = BasicBlock { addr: current_address, last_instruction_addr: 0, end_addr: 0, ty: BasicBlockType::Exit, is_entry_block: false, number_of_instructions: 0, }; loop { let addr_before_instruction = current_address; let mut ctx = &mut CpuContext { eip: current_address, ..cpu }; let analysis = ::analysis::analyze_step(&mut ctx); current_block.number_of_instructions += 1; let has_next_instruction = !analysis.no_next_instruction; current_address = ctx.eip; match analysis.ty { AnalysisType::Normal => { dbg_assert!(has_next_instruction); if basic_blocks.contains_key(¤t_address) { current_block.last_instruction_addr = addr_before_instruction; current_block.end_addr = current_address; dbg_assert!(!is_near_end_of_page(current_address)); current_block.ty = BasicBlockType::Normal { next_block_addr: current_address, }; break; } }, AnalysisType::Jump { offset, is_32, condition: Some(condition), } => { // conditional jump: continue at next and continue at jump target let jump_target = if is_32 { current_address.wrapping_add(offset as u32) } else { ctx.cs_offset.wrapping_add( (current_address .wrapping_sub(ctx.cs_offset) .wrapping_add(offset as u32)) & 0xFFFF, ) }; dbg_assert!(has_next_instruction); to_visit_stack.push(current_address as u16 & 0xFFF); let next_block_branch_taken_addr; if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) { to_visit_stack.push(jump_target as u16 & 0xFFF); next_block_branch_taken_addr = Some(jump_target); // Very simple heuristic for "infinite loops": This // detects Linux's "calibrating delay loop" if jump_target == current_block.addr { dbg_log!("Basic block looping back to front"); requires_loop_limit = true; } } else { next_block_branch_taken_addr = None; } let next_block_addr = if is_near_end_of_page(current_address) { None } else { Some(current_address) }; current_block.ty = BasicBlockType::ConditionalJump { next_block_addr, next_block_branch_taken_addr, condition, jump_offset: offset, jump_offset_is_32: is_32, }; current_block.last_instruction_addr = addr_before_instruction; current_block.end_addr = current_address; break; }, AnalysisType::Jump { offset, is_32, condition: None, } => { // non-conditional jump: continue at jump target let jump_target = if is_32 { current_address.wrapping_add(offset as u32) } else { ctx.cs_offset.wrapping_add( (current_address .wrapping_sub(ctx.cs_offset) .wrapping_add(offset as u32)) & 0xFFFF, ) }; if has_next_instruction { // Execution will eventually come back to the next instruction (CALL) marked_as_entry.insert(current_address as u16 & 0xFFF); to_visit_stack.push(current_address as u16 & 0xFFF); } if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) { current_block.ty = BasicBlockType::Normal { next_block_addr: jump_target, }; to_visit_stack.push(jump_target as u16 & 0xFFF); } else { current_block.ty = BasicBlockType::Exit; } current_block.last_instruction_addr = addr_before_instruction; current_block.end_addr = current_address; break; }, AnalysisType::BlockBoundary => { // a block boundary but not a jump, get out if has_next_instruction { // block boundary, but execution will eventually come back // to the next instruction. Create a new basic block // starting at the next instruction and register it as an // entry point marked_as_entry.insert(current_address as u16 & 0xFFF); to_visit_stack.push(current_address as u16 & 0xFFF); } current_block.last_instruction_addr = addr_before_instruction; current_block.end_addr = current_address; break; }, } if is_near_end_of_page(current_address) { current_block.last_instruction_addr = addr_before_instruction; current_block.end_addr = current_address; profiler::stat_increment(stat::COMPILE_CUT_OFF_AT_END_OF_PAGE); break; } } let previous_block = basic_blocks .range(..current_block.addr) .next_back() .map(|(_, previous_block)| (previous_block.addr, previous_block.end_addr)); if let Some((start_addr, end_addr)) = previous_block { if current_block.addr < end_addr { // If this block overlaps with the previous block, re-analyze the previous block let old_block = basic_blocks.remove(&start_addr); dbg_assert!(old_block.is_some()); to_visit_stack.push(start_addr as u16 & 0xFFF); // Note that this does not ensure the invariant that two consecutive blocks don't // overlay. For that, we also need to check the following block. } } dbg_assert!(current_block.addr < current_block.end_addr); dbg_assert!(current_block.addr <= current_block.last_instruction_addr); dbg_assert!(current_block.last_instruction_addr < current_block.end_addr); basic_blocks.insert(current_block.addr, current_block); } for block in basic_blocks.values_mut() { if marked_as_entry.contains(&(block.addr as u16 & 0xFFF)) { block.is_entry_block = true; } } let basic_blocks: Vec = basic_blocks.into_iter().map(|(_, block)| block).collect(); for i in 0..basic_blocks.len() - 1 { let next_block_addr = basic_blocks[i + 1].addr; let next_block_end_addr = basic_blocks[i + 1].end_addr; let next_block_is_entry = basic_blocks[i + 1].is_entry_block; let block = &basic_blocks[i]; dbg_assert!(block.addr < next_block_addr); if next_block_addr < block.end_addr { dbg_log!( "Overlapping first=[from={:x} to={:x} is_entry={}] second=[from={:x} to={:x} is_entry={}]", block.addr, block.end_addr, block.is_entry_block as u8, next_block_addr, next_block_end_addr, next_block_is_entry as u8 ); } } (basic_blocks, requires_loop_limit) } fn create_cache_entry(ctx: &mut JitState, entry: jit_cache_array::Entry) { let mut found_entry_index = None; let phys_addr = entry.start_addr; for i in 0..CODE_CACHE_SEARCH_SIZE { let addr_index = (phys_addr + i) & jit_cache_array::MASK; let existing_entry = jit_cache_array::get(addr_index); if existing_entry.start_addr == entry.start_addr && existing_entry.state_flags == entry.state_flags { profiler::stat_increment(stat::COMPILE_DUPLICATE_ENTRY); } if existing_entry.start_addr == 0 { found_entry_index = Some(addr_index); break; } } let found_entry_index = match found_entry_index { Some(i) => i, None => { profiler::stat_increment(stat::CACHE_MISMATCH); // no free slots, overwrite the first one let found_entry_index = phys_addr & jit_cache_array::MASK; let old_entry = jit_cache_array::get_mut(found_entry_index); // if we're here, we expect to overwrite a valid index dbg_assert!(old_entry.start_addr != 0); dbg_assert!(old_entry.wasm_table_index != 0); if old_entry.wasm_table_index == entry.wasm_table_index { profiler::stat_increment(stat::INVALIDATE_SINGLE_ENTRY_CACHE_FULL); dbg_assert!(old_entry.pending); dbg_assert!(Page::page_of(old_entry.start_addr) == Page::page_of(phys_addr)); // The old entry belongs to the same wasm table index as this entry. // *Don't* free the wasm table index, instead just delete the old entry // and use its slot for this entry. // TODO: Optimally, we should pick another slot instead of dropping // an entry has just been created. jit_cache_array::remove(found_entry_index); dbg_assert!(old_entry.next_index_same_page() == None); old_entry.pending = false; old_entry.start_addr = 0; } else { profiler::stat_increment(stat::INVALIDATE_MODULE_CACHE_FULL); let old_wasm_table_index = old_entry.wasm_table_index; let old_page = Page::page_of(old_entry.start_addr); remove_jit_cache_wasm_index(ctx, old_page, old_wasm_table_index); //jit_cache_array::check_invariants(); // old entry should be removed after calling remove_jit_cache_wasm_index dbg_assert!(!old_entry.pending); dbg_assert!(old_entry.start_addr == 0); dbg_assert!(old_entry.wasm_table_index == 0); dbg_assert!(old_entry.next_index_same_page() == None); } found_entry_index }, }; jit_cache_array::insert(found_entry_index, entry); } #[cfg(debug_assertions)] pub fn jit_force_generate_unsafe( ctx: &mut JitState, phys_addr: u32, cs_offset: u32, state_flags: CachedStateFlags, ) { record_entry_point(ctx, phys_addr); jit_analyze_and_generate(ctx, Page::page_of(phys_addr), cs_offset, state_flags); } #[inline(never)] fn jit_analyze_and_generate( ctx: &mut JitState, page: Page, cs_offset: u32, state_flags: CachedStateFlags, ) { profiler::stat_increment(stat::COMPILE); let entry_points = ctx.entry_points.remove(&page); let cpu = CpuContext { eip: 0, prefixes: 0, cs_offset, state_flags, }; if let Some(entry_points) = entry_points { let (basic_blocks, requires_loop_limit) = jit_find_basic_blocks(page, &entry_points, cpu.clone()); //for b in basic_blocks.iter() { // dbg_log!( // "> Basic block from {:x} to {:x}, is_entry={}", // b.addr, // b.end_addr, // b.is_entry_block // ); //} if ctx.wasm_table_index_free_list.is_empty() { dbg_log!( "wasm_table_index_free_list empty ({} pending_free), clearing cache", ctx.wasm_table_index_pending_free.len(), ); // When no free slots are available, delete all cached modules. We could increase the // size of the table, but this way the initial size acts as an upper bound for the // number of wasm modules that we generate, which we want anyway to avoid getting our // tab killed by browsers due to memory constraints. cpu::jit_clear_cache(); profiler::stat_increment(stat::INVALIDATE_ALL_MODULES_NO_FREE_WASM_INDICES); dbg_log!( "after jit_clear_cache: {} pending_free {} free", ctx.wasm_table_index_pending_free.len(), ctx.wasm_table_index_free_list.len(), ); // This assertion can fail if all entries are pending (not possible unless // WASM_TABLE_SIZE is set very low) dbg_assert!(!ctx.wasm_table_index_free_list.is_empty()); } // allocate an index in the wasm table let wasm_table_index = ctx .wasm_table_index_free_list .pop() .expect("allocate wasm table index"); dbg_assert!(wasm_table_index != 0); jit_generate_module( &basic_blocks, requires_loop_limit, cpu.clone(), &mut ctx.wasm_builder, ); // create entries for each basic block that is marked as an entry point let mut entry_point_count = 0; for (i, block) in basic_blocks.iter().enumerate() { profiler::stat_increment(stat::COMPILE_BASIC_BLOCK); if block.is_entry_block && block.addr != block.end_addr { dbg_assert!(block.addr != 0); let initial_state = i.safe_to_u16(); #[allow(unused_mut)] let mut entry = jit_cache_array::Entry::create( block.addr, None, // to be filled in by create_cache_entry wasm_table_index, initial_state, state_flags, true, ); #[cfg(any(debug_assertions, feature = "profiler"))] { entry.len = block.end_addr - block.addr; } #[cfg(debug_assertions)] { entry.opcode = cpu::read32(block.addr); } create_cache_entry(ctx, entry); entry_point_count += 1; profiler::stat_increment(stat::COMPILE_ENTRY_POINT); } } profiler::stat_increment_by( stat::COMPILE_WASM_TOTAL_BYTES, ::c_api::jit_get_op_len() as u64, ); dbg_assert!(entry_point_count > 0); cpu::tlb_set_has_code(page, true); jit_cache_array::check_invariants(); cpu::check_tlb_invariants(); let end_addr = 0; let first_opcode = 0; let phys_addr = page.to_address(); // will call codegen_finalize_finished asynchronously when finished cpu::codegen_finalize( wasm_table_index, phys_addr, end_addr, first_opcode, state_flags, ); profiler::stat_increment(stat::COMPILE_SUCCESS); } else { //dbg_log("No basic blocks, not generating code"); // Nothing to do } } pub fn codegen_finalize_finished( ctx: &mut JitState, wasm_table_index: u16, phys_addr: u32, _end_addr: u32, _first_opcode: u32, _state_flags: CachedStateFlags, ) { dbg_assert!(wasm_table_index != 0); match ctx .wasm_table_index_pending_free .iter() .position(|i| *i == wasm_table_index) { Some(i) => { ctx.wasm_table_index_pending_free.swap_remove(i); free_wasm_table_index(ctx, wasm_table_index); }, None => { let page = Page::page_of(phys_addr); let mut cache_array_index = jit_cache_array::get_page_index(page); while let Some(index) = cache_array_index { let mut entry = jit_cache_array::get_mut(index); if (*entry).wasm_table_index == wasm_table_index { dbg_assert!((*entry).pending); (*entry).pending = false; } cache_array_index = (*entry).next_index_same_page(); } }, } jit_cache_array::check_invariants(); if CHECK_JIT_CACHE_ARRAY_INVARIANTS { // sanity check that the above iteration marked all entries as not pending for i in 0..jit_cache_array::SIZE { let entry = jit_cache_array::get(i); if entry.wasm_table_index == wasm_table_index { dbg_assert!(!entry.pending); } } } } fn jit_generate_module( basic_blocks: &Vec, requires_loop_limit: bool, mut cpu: CpuContext, builder: &mut WasmBuilder, ) { builder.reset(); let basic_block_indices: HashMap = basic_blocks .iter() .enumerate() .map(|(index, block)| (block.addr, index as u32)) .collect(); // set state local variable to the initial state passed as the first argument builder .instruction_body .get_local(&builder.arg_local_initial_state); let gen_local_state = builder.set_new_local(); // initialise max_iterations let gen_local_iteration_counter = if JIT_ALWAYS_USE_LOOP_SAFETY || requires_loop_limit { builder .instruction_body .const_i32(JIT_MAX_ITERATIONS_PER_FUNCTION as i32); Some(builder.set_new_local()) } else { None }; let mut register_locals = (0..8) .map(|i| { builder .instruction_body .const_i32(global_pointers::get_reg32_offset(i) as i32); builder.instruction_body.load_aligned_i32_from_stack(0); let local = builder.set_new_local(); local }) .collect(); let ctx = &mut JitContext { cpu: &mut cpu, builder, register_locals: &mut register_locals, start_of_current_instruction: 0, current_brtable_depth: 0, }; // main state machine loop ctx.builder.instruction_body.loop_void(); if let Some(gen_local_iteration_counter) = gen_local_iteration_counter.as_ref() { profiler::stat_increment(stat::COMPILE_WITH_LOOP_SAFETY); // decrement max_iterations ctx.builder .instruction_body .get_local(gen_local_iteration_counter); ctx.builder.instruction_body.const_i32(-1); ctx.builder.instruction_body.add_i32(); ctx.builder .instruction_body .set_local(gen_local_iteration_counter); // if max_iterations == 0: return ctx.builder .instruction_body .get_local(gen_local_iteration_counter); ctx.builder.instruction_body.eqz_i32(); ctx.builder.instruction_body.if_void(); codegen::gen_debug_track_jit_exit(ctx.builder, 0); codegen::gen_move_registers_from_locals_to_memory(ctx); ctx.builder.instruction_body.return_(); ctx.builder.instruction_body.block_end(); } ctx.builder.instruction_body.block_void(); // for the default case ctx.builder.instruction_body.block_void(); // for the exit-with-pagefault case // generate the opening blocks for the cases for _ in 0..basic_blocks.len() { ctx.builder.instruction_body.block_void(); } ctx.builder.instruction_body.get_local(&gen_local_state); ctx.builder .instruction_body .brtable_and_cases(basic_blocks.len() as u32 + 1); // plus one for the exit-with-pagefault case for (i, block) in basic_blocks.iter().enumerate() { // Case [i] will jump after the [i]th block, so we first generate the // block end opcode and then the code for that block ctx.builder.instruction_body.block_end(); ctx.current_brtable_depth = basic_blocks.len() as u32 + 1 - i as u32; dbg_assert!(block.addr < block.end_addr); jit_generate_basic_block(ctx, block); let invalid_connection_to_next_block = block.end_addr != ctx.cpu.eip; dbg_assert!(!invalid_connection_to_next_block); match &block.ty { BasicBlockType::Exit => { // Exit this function codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); codegen::gen_move_registers_from_locals_to_memory(ctx); ctx.builder.instruction_body.return_(); }, BasicBlockType::Normal { next_block_addr } => { // Unconditional jump to next basic block // - All instructions that don't change eip // - Unconditional jump let next_basic_block_index = *basic_block_indices .get(&next_block_addr) .expect("basic_block_indices.get (Normal)"); if next_basic_block_index == (i as u32) + 1 { // fallthru } else { // set state variable to next basic block ctx.builder .instruction_body .const_i32(next_basic_block_index as i32); ctx.builder.instruction_body.set_local(&gen_local_state); ctx.builder.instruction_body.br(ctx.current_brtable_depth); // to the loop } }, &BasicBlockType::ConditionalJump { next_block_addr, next_block_branch_taken_addr, condition, jump_offset, jump_offset_is_32, } => { // Conditional jump to next basic block // - jnz, jc, loop, jcxz, etc. codegen::gen_condition_fn(ctx, condition); ctx.builder.instruction_body.if_void(); // Branch taken if jump_offset_is_32 { codegen::gen_relative_jump(ctx.builder, jump_offset); } else { codegen::gen_jmp_rel16(ctx.builder, jump_offset as u16); } if let Some(next_block_branch_taken_addr) = next_block_branch_taken_addr { let next_basic_block_branch_taken_index = *basic_block_indices .get(&next_block_branch_taken_addr) .expect("basic_block_indices.get (branch taken)"); ctx.builder .instruction_body .const_i32(next_basic_block_branch_taken_index as i32); ctx.builder.instruction_body.set_local(&gen_local_state); ctx.builder .instruction_body .br(basic_blocks.len() as u32 + 2 - i as u32); // to the loop } else { // Jump to different page codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); codegen::gen_move_registers_from_locals_to_memory(ctx); ctx.builder.instruction_body.return_(); } if let Some(next_block_addr) = next_block_addr { // Branch not taken let next_basic_block_index = *basic_block_indices .get(&next_block_addr) .expect("basic_block_indices.get (branch not taken)"); if next_basic_block_index == (i as u32) + 1 { // fallthru ctx.builder.instruction_body.block_end(); } else { ctx.builder.instruction_body.else_(); ctx.builder .instruction_body .const_i32(next_basic_block_index as i32); ctx.builder.instruction_body.set_local(&gen_local_state); ctx.builder .instruction_body .br(basic_blocks.len() as u32 + 2 - i as u32); // to the loop ctx.builder.instruction_body.block_end(); } } else { ctx.builder.instruction_body.else_(); // End of this page codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr); codegen::gen_move_registers_from_locals_to_memory(ctx); ctx.builder.instruction_body.return_(); ctx.builder.instruction_body.block_end(); } }, } } { // exit-with-pagefault case ctx.builder.instruction_body.block_end(); codegen::gen_move_registers_from_locals_to_memory(ctx); codegen::gen_fn0_const(ctx.builder, "trigger_pagefault_end_jit"); codegen::gen_clear_prefixes(ctx); ctx.builder.instruction_body.return_(); } ctx.builder.instruction_body.block_end(); // default case ctx.builder.instruction_body.unreachable(); ctx.builder.instruction_body.block_end(); // loop ctx.builder.free_local(gen_local_state); if let Some(local) = gen_local_iteration_counter { ctx.builder.free_local(local); } for local in ctx.register_locals.drain(..) { ctx.builder.free_local(local); } ctx.builder.finish(); } fn jit_generate_basic_block(ctx: &mut JitContext, block: &BasicBlock) { let start_addr = block.addr; let last_instruction_addr = block.last_instruction_addr; let stop_addr = block.end_addr; // First iteration of do-while assumes the caller confirms this condition dbg_assert!(!is_near_end_of_page(start_addr)); codegen::gen_increment_timestamp_counter(ctx.builder, block.number_of_instructions as i32); ctx.cpu.eip = start_addr; loop { let mut instruction = 0; if cfg!(feature = "profiler") { instruction = cpu::read32(ctx.cpu.eip); ::opstats::gen_opstats(ctx.builder, instruction); ::opstats::record_opstat_compiled(instruction); } if ctx.cpu.eip == last_instruction_addr { // Before the last instruction: // - Set eip to *after* the instruction // - Set previous_eip to *before* the instruction codegen::gen_set_previous_eip_offset_from_eip( ctx.builder, last_instruction_addr - start_addr, ); codegen::gen_increment_instruction_pointer(ctx.builder, stop_addr - start_addr); } let wasm_length_before = ctx.builder.instruction_body.len(); ctx.start_of_current_instruction = ctx.cpu.eip; let start_eip = ctx.cpu.eip; let mut instruction_flags = 0; jit_instructions::jit_instruction(ctx, &mut instruction_flags); let end_eip = ctx.cpu.eip; let instruction_length = end_eip - start_eip; let was_block_boundary = instruction_flags & JIT_INSTR_BLOCK_BOUNDARY_FLAG != 0; let wasm_length = ctx.builder.instruction_body.len() - wasm_length_before; ::opstats::record_opstat_size_wasm(instruction, wasm_length as u32); dbg_assert!((end_eip == stop_addr) == (start_eip == last_instruction_addr)); dbg_assert!(instruction_length < MAX_INSTRUCTION_LENGTH); let end_addr = ctx.cpu.eip; if end_addr == stop_addr { // no page was crossed dbg_assert!(Page::page_of(end_addr) == Page::page_of(start_addr)); break; } if was_block_boundary || is_near_end_of_page(end_addr) || end_addr > stop_addr { dbg_log!( "Overlapping basic blocks start={:x} expected_end={:x} end={:x} was_block_boundary={} near_end_of_page={}", start_addr, stop_addr, end_addr, was_block_boundary, is_near_end_of_page(end_addr) ); dbg_assert!(false); break; } } } pub fn jit_increase_hotness_and_maybe_compile( ctx: &mut JitState, phys_address: u32, cs_offset: u32, state_flags: CachedStateFlags, hotness: u32, ) { let page = Page::page_of(phys_address); let address_hash = jit_hot_hash_page(page) as usize; ctx.hot_code_addresses[address_hash] += hotness; if ctx.hot_code_addresses[address_hash] >= JIT_THRESHOLD { ctx.hot_code_addresses[address_hash] = 0; jit_analyze_and_generate(ctx, page, cs_offset, state_flags) }; } fn free_wasm_table_index(ctx: &mut JitState, wasm_table_index: u16) { if CHECK_JIT_CACHE_ARRAY_INVARIANTS { dbg_assert!(!ctx.wasm_table_index_free_list.contains(&wasm_table_index)); } ctx.wasm_table_index_free_list.push(wasm_table_index); // It is not strictly necessary to clear the function, but it will fail more predictably if we // accidentally use the function and may garbage collect unused modules earlier cpu::jit_clear_func(wasm_table_index); } /// Remove all entries with the given wasm_table_index in page fn remove_jit_cache_wasm_index(ctx: &mut JitState, page: Page, wasm_table_index: u16) { let mut cache_array_index = jit_cache_array::get_page_index(page).unwrap(); let mut pending = false; loop { let entry = jit_cache_array::get_mut(cache_array_index); let next_cache_array_index = entry.next_index_same_page(); if entry.wasm_table_index == wasm_table_index { // if one entry is pending, all must be pending dbg_assert!(!pending || entry.pending); pending = entry.pending; jit_cache_array::remove(cache_array_index); dbg_assert!(entry.next_index_same_page() == None); entry.wasm_table_index = 0; entry.start_addr = 0; entry.pending = false; } if let Some(i) = next_cache_array_index { cache_array_index = i; } else { break; } } if pending { ctx.wasm_table_index_pending_free.push(wasm_table_index); } else { free_wasm_table_index(ctx, wasm_table_index); } if !jit_page_has_code(ctx, page) { cpu::tlb_set_has_code(page, false); } if CHECK_JIT_CACHE_ARRAY_INVARIANTS { // sanity check that the above iteration deleted all entries for i in 0..jit_cache_array::SIZE { let entry = jit_cache_array::get(i); dbg_assert!(entry.wasm_table_index != wasm_table_index); } } } /// Register a write in this page: Delete all present code pub fn jit_dirty_page(ctx: &mut JitState, page: Page) { let mut did_have_code = false; if let Some(mut cache_array_index) = jit_cache_array::get_page_index(page) { did_have_code = true; let mut index_to_free = HashSet::new(); let mut index_to_pending_free = HashSet::new(); jit_cache_array::set_page_index(page, None); profiler::stat_increment(stat::INVALIDATE_PAGE); loop { profiler::stat_increment(stat::INVALIDATE_CACHE_ENTRY); let entry = jit_cache_array::get_mut(cache_array_index); let wasm_table_index = entry.wasm_table_index; dbg_assert!(page == Page::page_of(entry.start_addr)); let next_cache_array_index = entry.next_index_same_page(); entry.set_next_index_same_page(None); entry.start_addr = 0; entry.wasm_table_index = 0; if entry.pending { dbg_assert!(!index_to_free.contains(&wasm_table_index)); entry.pending = false; index_to_pending_free.insert(wasm_table_index); } else { dbg_assert!(!index_to_pending_free.contains(&wasm_table_index)); index_to_free.insert(wasm_table_index); } if let Some(i) = next_cache_array_index { cache_array_index = i; } else { break; } } profiler::stat_increment_by( stat::INVALIDATE_MODULE, index_to_pending_free.len() as u64 + index_to_free.len() as u64, ); for index in index_to_free.iter().cloned() { free_wasm_table_index(ctx, index) } for index in index_to_pending_free { ctx.wasm_table_index_pending_free.push(index); } } match ctx.entry_points.remove(&page) { None => {}, Some(_entry_points) => { did_have_code = true; // don't try to compile code in this page anymore until it's hot again ctx.hot_code_addresses[jit_hot_hash_page(page) as usize] = 0; }, } if did_have_code { cpu::tlb_set_has_code(page, false); } } pub fn jit_dirty_cache(ctx: &mut JitState, start_addr: u32, end_addr: u32) { dbg_assert!(start_addr < end_addr); let start_page = Page::page_of(start_addr); let end_page = Page::page_of(end_addr - 1); for page in start_page.to_u32()..end_page.to_u32() + 1 { jit_dirty_page(ctx, Page::page_of(page << 12)); } } pub fn jit_dirty_cache_small(ctx: &mut JitState, start_addr: u32, end_addr: u32) { dbg_assert!(start_addr < end_addr); let start_page = Page::page_of(start_addr); let end_page = Page::page_of(end_addr - 1); jit_dirty_page(ctx, start_page); // Note: This can't happen when paging is enabled, as writes across // boundaries are split up on two pages if start_page != end_page { dbg_assert!(start_page.to_u32() + 1 == end_page.to_u32()); jit_dirty_page(ctx, end_page); } } pub fn jit_empty_cache(ctx: &mut JitState) { ctx.entry_points.clear(); for page_index in 0..0x100000 { jit_dirty_page(ctx, Page::page_of(page_index << 12)) } } pub fn jit_page_has_code(ctx: &JitState, page: Page) -> bool { jit_cache_array::get_page_index(page) != None || ctx.entry_points.contains_key(&page) } #[cfg(debug_assertions)] pub fn jit_unused_cache_stat() -> u32 { let mut count = 0; for i in 0..jit_cache_array::SIZE { if (jit_cache_array::get(i)).start_addr == 0 { count += 1 } } return count; } #[cfg(debug_assertions)] pub fn jit_get_entry_length(i: u32) -> u32 { (jit_cache_array::get(i)).len } #[cfg(debug_assertions)] pub fn jit_get_entry_address(i: u32) -> u32 { (jit_cache_array::get(i)).start_addr } #[cfg(debug_assertions)] pub fn jit_get_entry_pending(i: u32) -> bool { (jit_cache_array::get(i)).pending } #[cfg(debug_assertions)] pub fn jit_get_wasm_table_index_free_list_count(ctx: &JitState) -> u32 { ctx.wasm_table_index_free_list.len() as u32 } pub fn jit_get_op_len(ctx: &JitState) -> u32 { ctx.wasm_builder.get_op_len() } pub fn jit_get_op_ptr(ctx: &JitState) -> *const u8 { ctx.wasm_builder.get_op_ptr() } #[cfg(feature = "profiler")] pub fn check_missed_entry_points(phys_address: u32, state_flags: CachedStateFlags) { let page = Page::page_of(phys_address); for i in page.to_address()..page.to_address() + 4096 { // No need to check [CODE_CACHE_SEARCH_SIZE] entries here as we look at consecutive // addresses anyway let index = i & jit_cache_array::MASK; let entry = jit_cache_array::get(index); if !entry.pending && entry.state_flags == state_flags && phys_address >= entry.start_addr && phys_address < entry.start_addr + entry.len { profiler::stat_increment(stat::RUN_INTERPRETED_MISSED_COMPILED_ENTRY_LOOKUP); let last_jump_type = unsafe { ::cpu2::cpu::debug_last_jump.name() }; let last_jump_addr = unsafe { ::cpu2::cpu::debug_last_jump.phys_address() }.unwrap_or(0); let last_jump_opcode = if last_jump_addr != 0 { cpu::read32(last_jump_addr) } else { 0 }; let opcode = cpu::read32(phys_address); dbg_log!( "Compiled exists, but no entry point, \ start={:x} end={:x} phys_addr={:x} opcode={:02x} {:02x} {:02x} {:02x}. \ Last jump at {:x} ({}) opcode={:02x} {:02x} {:02x} {:02x}", entry.start_addr, entry.start_addr + entry.len, phys_address, opcode & 0xFF, opcode >> 8 & 0xFF, opcode >> 16 & 0xFF, opcode >> 16 & 0xFF, last_jump_addr, last_jump_type, last_jump_opcode & 0xFF, last_jump_opcode >> 8 & 0xFF, last_jump_opcode >> 16 & 0xFF, last_jump_opcode >> 16 & 0xFF, ); } } }