From 0596b25f70a24c2d138f0ae15497f0e85577c50c Mon Sep 17 00:00:00 2001 From: Fabian Date: Sat, 5 Nov 2022 19:23:41 -0600 Subject: [PATCH] improve page heat calculation (combine with recording of entry points) --- src/browser/print_stats.js | 1 + src/rust/cpu/cpu.rs | 3 +- src/rust/jit.rs | 87 +++++++++++++++++--------------------- src/rust/profiler.rs | 1 + 4 files changed, 42 insertions(+), 50 deletions(-) diff --git a/src/browser/print_stats.js b/src/browser/print_stats.js index 55079841..db3e27d1 100644 --- a/src/browser/print_stats.js +++ b/src/browser/print_stats.js @@ -28,6 +28,7 @@ const print_stats = { "COMPILE_WASM_TOTAL_BYTES", "COMPILE_WASM_TOTAL_BYTES/COMPILE_PAGE", "RUN_INTERPRETED", + "RUN_INTERPRETED_NEW_PAGE", "RUN_INTERPRETED_PAGE_HAS_CODE", "RUN_INTERPRETED_PAGE_HAS_ENTRY_AFTER_PAGE_WALK", "RUN_INTERPRETED_NEAR_END_OF_PAGE", diff --git a/src/rust/cpu/cpu.rs b/src/rust/cpu/cpu.rs index 14251dde..cb7f0cc1 100644 --- a/src/rust/cpu/cpu.rs +++ b/src/rust/cpu/cpu.rs @@ -2802,6 +2802,8 @@ pub unsafe fn popa32() { write_reg32(EAX, pop32s().unwrap()); } +pub fn get_state_flags() -> CachedStateFlags { unsafe { *state_flags } } + #[no_mangle] pub fn get_seg_cs() -> i32 { unsafe { *segment_offsets.offset(CS as isize) } } @@ -2950,7 +2952,6 @@ pub unsafe fn cycle_internal() { else { *previous_ip = initial_eip; let phys_addr = return_on_pagefault!(get_phys_eip()); - jit::record_entry_point(phys_addr); match tlb_code[(initial_eip as u32 >> 12) as usize] { None => {}, diff --git a/src/rust/jit.rs b/src/rust/jit.rs index bfdb5e60..2b912e96 100644 --- a/src/rust/jit.rs +++ b/src/rust/jit.rs @@ -61,8 +61,6 @@ pub const BRTABLE_CUTOFF: usize = 10; pub const WASM_TABLE_SIZE: u32 = 900; -pub const HASH_PRIME: u32 = 6151; - pub const CHECK_JIT_STATE_INVARIANTS: bool = false; pub const JIT_USE_LOOP_SAFETY: bool = true; @@ -114,8 +112,7 @@ pub struct JitState { // (faster, but uses much more memory) // or a compressed bitmap (likely faster) // or HashSet rather than nested - entry_points: HashMap>, - hot_pages: [u32; HASH_PRIME as usize], + entry_points: HashMap)>, pages: HashMap, wasm_table_index_free_list: Vec, compiling: Option<(WasmTableIndex, CompilingPageState)>, @@ -181,7 +178,6 @@ impl JitState { wasm_builder: WasmBuilder::new(), entry_points: HashMap::new(), - hot_pages: [0; HASH_PRIME as usize], pages: HashMap::new(), wasm_table_index_free_list: Vec::from_iter(wasm_table_indices), @@ -302,8 +298,6 @@ impl<'a> JitContext<'a> { pub const JIT_INSTR_BLOCK_BOUNDARY_FLAG: u32 = 1 << 0; -fn jit_hot_hash_page(page: Page) -> u32 { page.to_u32() % HASH_PRIME } - pub fn is_near_end_of_page(address: u32) -> bool { address & 0xFFF >= 0x1000 - MAX_INSTRUCTION_LENGTH } @@ -369,27 +363,6 @@ pub fn jit_find_cache_entry_in_page( return -1; } -pub fn record_entry_point(phys_address: u32) { - let ctx = get_jit_state(); - if is_near_end_of_page(phys_address) { - return; - } - let page = Page::page_of(phys_address); - let offset_in_page = phys_address as u16 & 0xFFF; - let mut is_new = false; - ctx.entry_points - .entry(page) - .or_insert_with(|| { - is_new = true; - HashSet::new() - }) - .insert(offset_in_page); - - if is_new { - cpu::tlb_set_has_code(page, true); - } -} - // Maximum number of pages per wasm module. Necessary for the following reasons: // - There is an upper limit on the size of a single function in wasm (currently ~7MB in all browsers) // See https://github.com/WebAssembly/design/issues/1138 @@ -433,7 +406,7 @@ fn jit_find_basic_blocks( if !pages.contains(&phys_page) { // page seen for the first time, handle entry points - if let Some(entry_points) = ctx.entry_points.get(&phys_page) { + if let Some((hotness, entry_points)) = ctx.entry_points.get_mut(&phys_page) { let existing_entry_points = match ctx.pages.get(&phys_page) { Some(PageInfo { entry_points, .. }) => { HashSet::from_iter(entry_points.iter().map(|x| x.0)) @@ -456,10 +429,9 @@ fn jit_find_basic_blocks( // entry_points.union(&existing_entry_points).count() == entry_points.len() //); - let address_hash = jit_hot_hash_page(phys_page) as usize; - ctx.hot_pages[address_hash] = 0; + *hotness = 0; - for &addr_low in entry_points { + for &addr_low in entry_points.iter() { let addr = virt_target & !0xFFF | addr_low as i32; to_visit_stack.push(addr); marked_as_entry.insert(addr); @@ -766,13 +738,18 @@ fn jit_find_basic_blocks( #[no_mangle] #[cfg(debug_assertions)] pub fn jit_force_generate_unsafe(virt_addr: i32) { - let ctx = get_jit_state(); - let phys_addr = cpu::translate_address_read(virt_addr).unwrap(); - record_entry_point(phys_addr); - let cs_offset = cpu::get_seg_cs() as u32; - jit_analyze_and_generate(ctx, virt_addr, phys_addr, cs_offset, unsafe { - *global_pointers::state_flags - }); + dbg_assert!( + !is_near_end_of_page(virt_addr as u32), + "cannot force compile near end of page" + ); + jit_increase_hotness_and_maybe_compile( + virt_addr, + cpu::translate_address_read(virt_addr).unwrap(), + cpu::get_seg_cs() as u32, + cpu::get_state_flags(), + JIT_THRESHOLD, + ); + dbg_assert!(get_jit_state().compiling.is_some()); } #[inline(never)] @@ -787,7 +764,7 @@ fn jit_analyze_and_generate( dbg_assert!(ctx.compiling.is_none()); - let entry_points = match ctx.entry_points.get(&page) { + let (_, entry_points) = match ctx.entry_points.get(&page) { None => return, Some(entry_points) => entry_points, }; @@ -983,6 +960,12 @@ fn jit_analyze_and_generate( ); profiler::stat_increment_by(stat::COMPILE_PAGE, pages.len() as u64); + for &p in &pages { + ctx.entry_points + .entry(p) + .or_insert_with(|| (0, HashSet::new())); + } + cpu::tlb_set_has_code_multiple(&pages, true); dbg_assert!(ctx.compiling.is_none()); @@ -2059,19 +2042,28 @@ pub fn jit_increase_hotness_and_maybe_compile( phys_address: u32, cs_offset: u32, state_flags: CachedStateFlags, - hotness: u32, + heat: u32, ) { let ctx = get_jit_state(); let page = Page::page_of(phys_address); - let address_hash = jit_hot_hash_page(page) as usize; - ctx.hot_pages[address_hash] += hotness; - if ctx.hot_pages[address_hash] >= JIT_THRESHOLD { + let (hotness, entry_points) = ctx.entry_points.entry(page).or_insert_with(|| { + cpu::tlb_set_has_code(page, true); + profiler::stat_increment(stat::RUN_INTERPRETED_NEW_PAGE); + (0, HashSet::new()) + }); + + if !is_near_end_of_page(phys_address) { + entry_points.insert(phys_address as u16 & 0xFFF); + } + + *hotness += heat; + if *hotness >= JIT_THRESHOLD { if ctx.compiling.is_some() { return; } // only try generating if we're in the correct address space if cpu::translate_address_read_no_side_effects(virt_address) == Ok(phys_address) { - ctx.hot_pages[address_hash] = 0; + *hotness = 0; jit_analyze_and_generate(ctx, virt_address, phys_address, cs_offset, state_flags) } else { @@ -2191,13 +2183,10 @@ pub fn jit_dirty_page(ctx: &mut JitState, page: Page) { match ctx.entry_points.remove(&page) { None => {}, - Some(_entry_points) => { + Some(_) => { profiler::stat_increment(stat::INVALIDATE_PAGE_HAD_ENTRY_POINTS); did_have_code = true; - // don't try to compile code in this page anymore until it's hot again - ctx.hot_pages[jit_hot_hash_page(page) as usize] = 0; - match &ctx.compiling { Some((index, CompilingPageState::Compiling { pages })) => { if pages.contains_key(&page) { diff --git a/src/rust/profiler.rs b/src/rust/profiler.rs index f294c911..b0ece211 100644 --- a/src/rust/profiler.rs +++ b/src/rust/profiler.rs @@ -15,6 +15,7 @@ pub enum stat { COMPILE_WASM_TOTAL_BYTES, RUN_INTERPRETED, + RUN_INTERPRETED_NEW_PAGE, RUN_INTERPRETED_PAGE_HAS_CODE, RUN_INTERPRETED_PAGE_HAS_ENTRY_AFTER_PAGE_WALK, RUN_INTERPRETED_NEAR_END_OF_PAGE,