Multi-page wasm modules
This commit is contained in:
parent
8221936a11
commit
6352bfe3ed
|
@ -14,11 +14,12 @@ const print_stats = {
|
|||
const stat_names = [
|
||||
"COMPILE",
|
||||
"COMPILE_SUCCESS",
|
||||
"COMPILE_WRONG_ADDRESS_SPACE",
|
||||
"COMPILE_CUT_OFF_AT_END_OF_PAGE",
|
||||
"COMPILE_WITH_LOOP_SAFETY",
|
||||
"COMPILE_PAGE",
|
||||
"COMPILE_BASIC_BLOCK",
|
||||
"COMPILE_ENTRY_POINT",
|
||||
"COMPILE_DUPLICATE_ENTRY",
|
||||
"COMPILE_WASM_TOTAL_BYTES",
|
||||
"CACHE_MISMATCH",
|
||||
"RUN_INTERPRETED",
|
||||
|
@ -30,6 +31,7 @@ const print_stats = {
|
|||
"RUN_INTERPRETED_STEPS",
|
||||
"RUN_FROM_CACHE",
|
||||
"RUN_FROM_CACHE_STEPS",
|
||||
"FAILED_PAGE_CHANGE",
|
||||
"SAFE_READ_FAST",
|
||||
"SAFE_READ_SLOW_PAGE_CROSSED",
|
||||
"SAFE_READ_SLOW_NOT_VALID",
|
||||
|
@ -50,16 +52,19 @@ const print_stats = {
|
|||
"SAFE_READ_WRITE_SLOW_READ_ONLY",
|
||||
"SAFE_READ_WRITE_SLOW_HAS_CODE",
|
||||
"PAGE_FAULT",
|
||||
"TLB_MISS",
|
||||
"DO_RUN",
|
||||
"DO_MANY_CYCLES",
|
||||
"CYCLE_INTERNAL",
|
||||
"INVALIDATE_ALL_MODULES_NO_FREE_WASM_INDICES",
|
||||
"INVALIDATE_PAGE",
|
||||
"INVALIDATE_MODULE",
|
||||
"INVALIDATE_CACHE_ENTRY",
|
||||
"INVALIDATE_MODULE_CACHE_FULL",
|
||||
"INVALIDATE_SINGLE_ENTRY_CACHE_FULL",
|
||||
"INVALIDATE_MODULE_WRITTEN_WHILE_COMPILED",
|
||||
"INVALIDATE_MODULE_UNUSED_AFTER_OVERWRITE",
|
||||
"INVALIDATE_MODULE_DIRTY_PAGE",
|
||||
"INVALIDATE_PAGE_HAD_CODE",
|
||||
"INVALIDATE_PAGE_HAD_ENTRY_POINTS",
|
||||
"DIRTY_PAGE_DID_NOT_HAVE_CODE",
|
||||
"RUN_FROM_CACHE_EXIT_SAME_PAGE",
|
||||
"RUN_FROM_CACHE_EXIT_NEAR_END_OF_PAGE",
|
||||
"RUN_FROM_CACHE_EXIT_DIFFERENT_PAGE",
|
||||
"CLEAR_TLB",
|
||||
"FULL_CLEAR_TLB",
|
||||
|
|
|
@ -74,40 +74,49 @@ pub fn gen_relative_jump(builder: &mut WasmBuilder, n: i32) {
|
|||
builder.store_aligned_i32(0);
|
||||
}
|
||||
|
||||
pub fn gen_page_switch_check(
|
||||
ctx: &mut JitContext,
|
||||
next_block_addr: u32,
|
||||
last_instruction_addr: u32,
|
||||
) {
|
||||
// After switching a page while in jitted code, check if the page mapping still holds
|
||||
|
||||
gen_get_eip(ctx.builder);
|
||||
let address_local = ctx.builder.set_new_local();
|
||||
gen_get_phys_eip(ctx, &address_local);
|
||||
ctx.builder.free_local(address_local);
|
||||
|
||||
ctx.builder.const_i32(next_block_addr as i32);
|
||||
ctx.builder.ne_i32();
|
||||
ctx.builder.if_void();
|
||||
gen_profiler_stat_increment(ctx.builder, profiler::stat::FAILED_PAGE_CHANGE);
|
||||
gen_debug_track_jit_exit(ctx.builder, last_instruction_addr);
|
||||
gen_move_registers_from_locals_to_memory(ctx);
|
||||
ctx.builder.return_();
|
||||
ctx.builder.block_end();
|
||||
}
|
||||
|
||||
pub fn gen_absolute_indirect_jump(ctx: &mut JitContext, new_eip: WasmLocal) {
|
||||
ctx.builder
|
||||
.const_i32(global_pointers::instruction_pointer as i32);
|
||||
ctx.builder.get_local(&new_eip);
|
||||
ctx.builder.store_aligned_i32(0);
|
||||
|
||||
ctx.builder.get_local(&new_eip);
|
||||
ctx.builder
|
||||
.load_fixed_i32(global_pointers::previous_ip as u32);
|
||||
ctx.builder.xor_i32();
|
||||
ctx.builder.const_i32(!0xFFF);
|
||||
ctx.builder.and_i32();
|
||||
ctx.builder.eqz_i32();
|
||||
gen_get_phys_eip(ctx, &new_eip);
|
||||
ctx.builder.free_local(new_eip);
|
||||
|
||||
ctx.builder.const_i32(ctx.our_wasm_table_index as i32);
|
||||
ctx.builder.const_i32(ctx.state_flags.to_u32() as i32);
|
||||
ctx.builder.call_fn3_ret("jit_find_cache_entry_in_page");
|
||||
let new_basic_block_index = ctx.builder.tee_new_local();
|
||||
ctx.builder.const_i32(0);
|
||||
ctx.builder.ge_i32();
|
||||
ctx.builder.if_void();
|
||||
{
|
||||
// try staying in same page
|
||||
ctx.builder.get_local(&new_eip);
|
||||
ctx.builder.free_local(new_eip);
|
||||
ctx.builder
|
||||
.const_i32(ctx.start_of_current_instruction as i32);
|
||||
ctx.builder.const_i32(ctx.our_wasm_table_index as i32);
|
||||
ctx.builder.const_i32(ctx.state_flags.to_u32() as i32);
|
||||
ctx.builder.call_fn4_ret("jit_find_cache_entry_in_page");
|
||||
let new_basic_block_index = ctx.builder.tee_new_local();
|
||||
ctx.builder.const_i32(0);
|
||||
ctx.builder.ge_i32();
|
||||
ctx.builder.if_void();
|
||||
ctx.builder.get_local(&new_basic_block_index);
|
||||
ctx.builder.set_local(ctx.basic_block_index_local);
|
||||
ctx.builder.br(ctx.current_brtable_depth + 2); // to the loop
|
||||
ctx.builder.block_end();
|
||||
ctx.builder.free_local(new_basic_block_index);
|
||||
}
|
||||
ctx.builder.get_local(&new_basic_block_index);
|
||||
ctx.builder.set_local(ctx.basic_block_index_local);
|
||||
ctx.builder.br(ctx.current_brtable_depth + 1); // to the loop
|
||||
ctx.builder.block_end();
|
||||
ctx.builder.free_local(new_basic_block_index);
|
||||
}
|
||||
|
||||
pub fn gen_increment_timestamp_counter(builder: &mut WasmBuilder, n: i32) {
|
||||
|
@ -626,6 +635,76 @@ fn gen_safe_read(
|
|||
ctx.builder.free_local(entry_local);
|
||||
}
|
||||
|
||||
pub fn gen_get_phys_eip(ctx: &mut JitContext, address_local: &WasmLocal) {
|
||||
// Similar to gen_safe_read, but return the physical eip rather than reading from memory
|
||||
// Does not (need to) handle mapped memory
|
||||
// XXX: Currently does not use ctx.start_of_current_instruction, but rather assumes that eip is
|
||||
// already correct (pointing at the current instruction)
|
||||
|
||||
ctx.builder.block_void();
|
||||
ctx.builder.get_local(&address_local);
|
||||
|
||||
ctx.builder.const_i32(12);
|
||||
ctx.builder.shr_u_i32();
|
||||
ctx.builder.const_i32(2);
|
||||
ctx.builder.shl_i32();
|
||||
|
||||
ctx.builder
|
||||
.load_aligned_i32(unsafe { &tlb_data[0] as *const i32 as u32 });
|
||||
let entry_local = ctx.builder.tee_new_local();
|
||||
|
||||
ctx.builder.const_i32(
|
||||
(0xFFF
|
||||
& !TLB_READONLY
|
||||
& !TLB_GLOBAL
|
||||
& !TLB_HAS_CODE
|
||||
& !(if ctx.cpu.cpl3() { 0 } else { TLB_NO_USER })) as i32,
|
||||
);
|
||||
ctx.builder.and_i32();
|
||||
|
||||
ctx.builder.const_i32(TLB_VALID as i32);
|
||||
ctx.builder.eq_i32();
|
||||
|
||||
ctx.builder.br_if(0);
|
||||
|
||||
if cfg!(feature = "profiler") {
|
||||
ctx.builder.get_local(&address_local);
|
||||
ctx.builder.get_local(&entry_local);
|
||||
ctx.builder.call_fn2("report_safe_read_jit_slow");
|
||||
}
|
||||
|
||||
ctx.builder.get_local(&address_local);
|
||||
ctx.builder.call_fn1_ret("get_phys_eip_slow_jit");
|
||||
ctx.builder.tee_local(&entry_local);
|
||||
ctx.builder.const_i32(1);
|
||||
ctx.builder.and_i32();
|
||||
|
||||
if cfg!(feature = "profiler") {
|
||||
ctx.builder.if_void();
|
||||
gen_debug_track_jit_exit(ctx.builder, ctx.start_of_current_instruction); // XXX
|
||||
ctx.builder.block_end();
|
||||
|
||||
ctx.builder.get_local(&entry_local);
|
||||
ctx.builder.const_i32(1);
|
||||
ctx.builder.and_i32();
|
||||
}
|
||||
|
||||
// -2 for the exit-with-pagefault block, +1 for leaving the nested if from this function
|
||||
ctx.builder.br_if(ctx.current_brtable_depth - 2 + 1);
|
||||
|
||||
ctx.builder.block_end();
|
||||
|
||||
gen_profiler_stat_increment(ctx.builder, profiler::stat::SAFE_READ_FAST); // XXX: Both fast and slow
|
||||
|
||||
ctx.builder.get_local(&entry_local);
|
||||
ctx.builder.const_i32(!0xFFF);
|
||||
ctx.builder.and_i32();
|
||||
ctx.builder.get_local(&address_local);
|
||||
ctx.builder.xor_i32();
|
||||
|
||||
ctx.builder.free_local(entry_local);
|
||||
}
|
||||
|
||||
fn gen_safe_write(
|
||||
ctx: &mut JitContext,
|
||||
bits: BitSize,
|
||||
|
|
|
@ -17,7 +17,6 @@ extern "C" {
|
|||
pub fn io_port_write32(port: i32, value: i32);
|
||||
}
|
||||
|
||||
use ::jit;
|
||||
use cpu::fpu::fpu_set_tag_word;
|
||||
use cpu::global_pointers::*;
|
||||
use cpu::memory;
|
||||
|
@ -31,6 +30,8 @@ use cpu::misc_instr::{
|
|||
push16, push32,
|
||||
};
|
||||
use cpu::modrm::{resolve_modrm16, resolve_modrm32};
|
||||
use jit;
|
||||
use jit::is_near_end_of_page;
|
||||
use page::Page;
|
||||
use paging::OrPageFault;
|
||||
use profiler;
|
||||
|
@ -1493,14 +1494,28 @@ pub unsafe fn get_eflags() -> i32 {
|
|||
#[no_mangle]
|
||||
pub unsafe fn get_eflags_no_arith() -> i32 { return *flags; }
|
||||
|
||||
pub unsafe fn translate_address_read(address: i32) -> OrPageFault<u32> {
|
||||
let entry = tlb_data[(address as u32 >> 12) as usize];
|
||||
let user = *cpl == 3;
|
||||
pub fn translate_address_read_no_side_effects(address: i32) -> Option<u32> {
|
||||
let entry = unsafe { tlb_data[(address as u32 >> 12) as usize] };
|
||||
let user = unsafe { *cpl } == 3;
|
||||
if entry & (TLB_VALID | if user { TLB_NO_USER } else { 0 }) == TLB_VALID {
|
||||
Some((entry & !0xFFF ^ address) as u32)
|
||||
}
|
||||
else {
|
||||
match unsafe { do_page_walk(address, false, user, false) } {
|
||||
Ok(phys_addr_high) => Some((phys_addr_high | address & 0xFFF) as u32),
|
||||
Err(_pagefault) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn translate_address_read(address: i32) -> OrPageFault<u32> {
|
||||
let entry = unsafe { tlb_data[(address as u32 >> 12) as usize] };
|
||||
let user = unsafe { *cpl == 3 };
|
||||
if entry & (TLB_VALID | if user { TLB_NO_USER } else { 0 }) == TLB_VALID {
|
||||
Ok((entry & !0xFFF ^ address) as u32)
|
||||
}
|
||||
else {
|
||||
Ok((do_page_translation(address, false, user)? | address & 0xFFF) as u32)
|
||||
Ok((unsafe { do_page_translation(address, false, user) }? | address & 0xFFF) as u32)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1511,7 +1526,7 @@ pub unsafe fn translate_address_read_jit(address: i32) -> OrPageFault<u32> {
|
|||
Ok((entry & !0xFFF ^ address) as u32)
|
||||
}
|
||||
else {
|
||||
match do_page_walk(address, false, user) {
|
||||
match do_page_walk(address, false, user, true) {
|
||||
Ok(phys_addr_high) => Ok((phys_addr_high | address & 0xFFF) as u32),
|
||||
Err(pagefault) => {
|
||||
trigger_pagefault_jit(pagefault);
|
||||
|
@ -1528,8 +1543,9 @@ pub struct PageFault {
|
|||
present: bool,
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub unsafe fn do_page_translation(addr: i32, for_writing: bool, user: bool) -> OrPageFault<i32> {
|
||||
match do_page_walk(addr, for_writing, user) {
|
||||
match do_page_walk(addr, for_writing, user, true) {
|
||||
Ok(phys_addr) => Ok(phys_addr),
|
||||
Err(pagefault) => {
|
||||
trigger_pagefault(pagefault);
|
||||
|
@ -1538,7 +1554,12 @@ pub unsafe fn do_page_translation(addr: i32, for_writing: bool, user: bool) -> O
|
|||
}
|
||||
}
|
||||
|
||||
pub unsafe fn do_page_walk(addr: i32, for_writing: bool, user: bool) -> Result<i32, PageFault> {
|
||||
pub unsafe fn do_page_walk(
|
||||
addr: i32,
|
||||
for_writing: bool,
|
||||
user: bool,
|
||||
side_effects: bool,
|
||||
) -> Result<i32, PageFault> {
|
||||
let mut can_write: bool = true;
|
||||
let global;
|
||||
let mut allow_user: bool = true;
|
||||
|
@ -1550,6 +1571,8 @@ pub unsafe fn do_page_walk(addr: i32, for_writing: bool, user: bool) -> Result<i
|
|||
global = false
|
||||
}
|
||||
else {
|
||||
profiler::stat_increment(TLB_MISS);
|
||||
|
||||
let page_dir_addr = (*cr.offset(3) as u32 >> 2).wrapping_add((page >> 10) as u32) as i32;
|
||||
let page_dir_entry = read_aligned32(page_dir_addr as u32);
|
||||
// XXX
|
||||
|
@ -1598,7 +1621,7 @@ pub unsafe fn do_page_walk(addr: i32, for_writing: bool, user: bool) -> Result<i
|
|||
| PAGE_TABLE_ACCESSED_MASK
|
||||
| if for_writing { PAGE_TABLE_DIRTY_MASK } else { 0 };
|
||||
|
||||
if page_dir_entry != new_page_dir_entry {
|
||||
if side_effects && page_dir_entry != new_page_dir_entry {
|
||||
write_aligned32(page_dir_addr as u32, new_page_dir_entry);
|
||||
}
|
||||
|
||||
|
@ -1643,13 +1666,13 @@ pub unsafe fn do_page_walk(addr: i32, for_writing: bool, user: bool) -> Result<i
|
|||
// Set the accessed and dirty bits
|
||||
// Note: dirty bit is only set on the page table entry
|
||||
let new_page_dir_entry = page_dir_entry | PAGE_TABLE_ACCESSED_MASK;
|
||||
if new_page_dir_entry != page_dir_entry {
|
||||
if side_effects && new_page_dir_entry != page_dir_entry {
|
||||
write_aligned32(page_dir_addr as u32, new_page_dir_entry);
|
||||
}
|
||||
let new_page_table_entry = page_table_entry
|
||||
| PAGE_TABLE_ACCESSED_MASK
|
||||
| if for_writing { PAGE_TABLE_DIRTY_MASK } else { 0 };
|
||||
if page_table_entry != new_page_table_entry {
|
||||
if side_effects && page_table_entry != new_page_table_entry {
|
||||
write_aligned32(page_table_addr as u32, new_page_table_entry);
|
||||
}
|
||||
|
||||
|
@ -1657,7 +1680,7 @@ pub unsafe fn do_page_walk(addr: i32, for_writing: bool, user: bool) -> Result<i
|
|||
global = page_table_entry & PAGE_TABLE_GLOBAL_MASK == PAGE_TABLE_GLOBAL_MASK
|
||||
}
|
||||
}
|
||||
if tlb_data[page as usize] == 0 {
|
||||
if side_effects && tlb_data[page as usize] == 0 {
|
||||
if valid_tlb_entries_count == VALID_TLB_ENTRY_MAX {
|
||||
profiler::stat_increment(TLB_FULL);
|
||||
clear_tlb();
|
||||
|
@ -1693,7 +1716,9 @@ pub unsafe fn do_page_walk(addr: i32, for_writing: bool, user: bool) -> Result<i
|
|||
| if global && 0 != *cr.offset(4) & CR4_PGE { TLB_GLOBAL } else { 0 }
|
||||
| if has_code { TLB_HAS_CODE } else { 0 };
|
||||
dbg_assert!((high ^ page << 12) & 0xFFF == 0);
|
||||
tlb_data[page as usize] = high ^ page << 12 | info_bits;
|
||||
if side_effects {
|
||||
tlb_data[page as usize] = high ^ page << 12 | info_bits
|
||||
}
|
||||
return Ok(high);
|
||||
}
|
||||
|
||||
|
@ -1864,7 +1889,7 @@ pub unsafe fn translate_address_write_jit(address: i32) -> OrPageFault<u32> {
|
|||
Ok((entry & !0xFFF ^ address) as u32)
|
||||
}
|
||||
else {
|
||||
match do_page_walk(address, true, user) {
|
||||
match do_page_walk(address, true, user, true) {
|
||||
Ok(phys_addr_high) => Ok((phys_addr_high | address & 0xFFF) as u32),
|
||||
Err(pagefault) => {
|
||||
trigger_pagefault_jit(pagefault);
|
||||
|
@ -2330,8 +2355,9 @@ pub unsafe fn cycle_internal() {
|
|||
{
|
||||
in_jit = true;
|
||||
}
|
||||
let initial_eip = *instruction_pointer;
|
||||
call_indirect1(
|
||||
(wasm_table_index as u32).wrapping_add(WASM_TABLE_OFFSET as u32) as i32,
|
||||
wasm_table_index as i32 + WASM_TABLE_OFFSET as i32,
|
||||
initial_state,
|
||||
);
|
||||
#[cfg(debug_assertions)]
|
||||
|
@ -2357,10 +2383,16 @@ pub unsafe fn cycle_internal() {
|
|||
// Happens during exit due to loop iteration limit
|
||||
0
|
||||
};
|
||||
|
||||
::opstats::record_opstat_jit_exit(last_jump_opcode as u32);
|
||||
}
|
||||
|
||||
if Page::page_of(*previous_ip as u32) == Page::page_of(*instruction_pointer as u32) {
|
||||
if is_near_end_of_page(*instruction_pointer as u32) {
|
||||
profiler::stat_increment(RUN_FROM_CACHE_EXIT_NEAR_END_OF_PAGE);
|
||||
}
|
||||
else if Page::page_of(initial_eip as u32)
|
||||
== Page::page_of(*instruction_pointer as u32)
|
||||
{
|
||||
profiler::stat_increment(RUN_FROM_CACHE_EXIT_SAME_PAGE);
|
||||
}
|
||||
else {
|
||||
|
@ -2368,6 +2400,7 @@ pub unsafe fn cycle_internal() {
|
|||
}
|
||||
}
|
||||
else {
|
||||
let initial_eip = *instruction_pointer;
|
||||
jit::record_entry_point(phys_addr);
|
||||
|
||||
#[cfg(feature = "profiler")]
|
||||
|
@ -2389,6 +2422,7 @@ pub unsafe fn cycle_internal() {
|
|||
jit_run_interpreted(phys_addr as i32);
|
||||
|
||||
jit::jit_increase_hotness_and_maybe_compile(
|
||||
initial_eip,
|
||||
phys_addr,
|
||||
get_seg_cs() as u32,
|
||||
state_flags,
|
||||
|
@ -2847,6 +2881,20 @@ pub unsafe fn safe_read128s_slow_jit(addr: i32, eip: i32) -> i32 {
|
|||
safe_read_slow_jit(addr, 128, eip, false)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe fn get_phys_eip_slow_jit(addr: i32) -> i32 {
|
||||
match translate_address_read_jit(addr) {
|
||||
Err(()) => {
|
||||
*previous_ip = *instruction_pointer;
|
||||
1
|
||||
},
|
||||
Ok(addr_low) => {
|
||||
dbg_assert!(!in_mapped_range(addr_low as u32)); // same assumption as in read_imm8
|
||||
(addr_low as i32 ^ addr) & !0xFFF
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe fn safe_read_write8_slow_jit(addr: i32, eip: i32) -> i32 {
|
||||
safe_read_slow_jit(addr, 8, eip, true)
|
||||
|
@ -3508,3 +3556,21 @@ pub unsafe fn pic_call_irq(interrupt_nr: i32) {
|
|||
*previous_ip = *instruction_pointer; // XXX: What if called after instruction (port IO)
|
||||
call_interrupt_vector(interrupt_nr, false, None);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub unsafe fn check_page_switch(block_addr: u32, next_block_addr: u32) {
|
||||
let x = translate_address_read_jit(*instruction_pointer);
|
||||
if x != Ok(next_block_addr) {
|
||||
dbg_log!(
|
||||
"page switch from={:x} to={:x} prev_eip={:x} eip={:x} phys_eip={:x}",
|
||||
block_addr,
|
||||
next_block_addr,
|
||||
*previous_ip,
|
||||
*instruction_pointer,
|
||||
x.unwrap_or(0),
|
||||
);
|
||||
}
|
||||
dbg_assert!(next_block_addr & 0xFFF == *instruction_pointer as u32 & 0xFFF);
|
||||
dbg_assert!(x.is_ok());
|
||||
dbg_assert!(x == Ok(next_block_addr));
|
||||
}
|
||||
|
|
587
src/rust/jit.rs
587
src/rust/jit.rs
|
@ -17,8 +17,10 @@ use state_flags::CachedStateFlags;
|
|||
use util::SafeToU16;
|
||||
use wasmgen::wasm_builder::{WasmBuilder, WasmLocal};
|
||||
|
||||
type WasmTableIndex = u16;
|
||||
|
||||
mod unsafe_jit {
|
||||
use ::jit::CachedStateFlags;
|
||||
use jit::CachedStateFlags;
|
||||
|
||||
extern "C" {
|
||||
pub fn codegen_finalize(
|
||||
|
@ -50,9 +52,9 @@ pub const WASM_TABLE_SIZE: u32 = 900;
|
|||
|
||||
pub const HASH_PRIME: u32 = 6151;
|
||||
|
||||
pub const CHECK_JIT_CACHE_ARRAY_INVARIANTS: bool = false;
|
||||
pub const CHECK_JIT_STATE_INVARIANTS: bool = false;
|
||||
|
||||
pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 10000;
|
||||
pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 20011;
|
||||
|
||||
pub const JIT_ALWAYS_USE_LOOP_SAFETY: bool = true;
|
||||
|
||||
|
@ -98,16 +100,33 @@ enum PageState {
|
|||
}
|
||||
|
||||
pub struct JitState {
|
||||
wasm_builder: WasmBuilder,
|
||||
|
||||
// as an alternative to HashSet, we could use a bitmap of 4096 bits here
|
||||
// (faster, but uses much more memory)
|
||||
// or a compressed bitmap (likely faster)
|
||||
hot_pages: [u32; HASH_PRIME as usize],
|
||||
wasm_table_index_free_list: Vec<u16>,
|
||||
// or HashSet<u32> rather than nested
|
||||
entry_points: HashMap<Page, HashSet<u16>>,
|
||||
wasm_builder: WasmBuilder,
|
||||
hot_pages: [u32; HASH_PRIME as usize],
|
||||
|
||||
cache: BTreeMap<u32, Entry>,
|
||||
page_has_pending_code: HashMap<Page, (u16, PageState)>,
|
||||
wasm_table_index_free_list: Vec<WasmTableIndex>,
|
||||
used_wasm_table_indices: HashMap<WasmTableIndex, HashSet<Page>>,
|
||||
// All pages from used_wasm_table_indices
|
||||
// Used to improve the performance of jit_dirty_page and jit_page_has_code
|
||||
all_pages: HashSet<Page>,
|
||||
cache: HashMap<u32, Entry>,
|
||||
compiling: Option<(WasmTableIndex, PageState)>,
|
||||
}
|
||||
|
||||
pub fn check_jit_state_invariants(ctx: &mut JitState) {
|
||||
if !CHECK_JIT_STATE_INVARIANTS {
|
||||
return;
|
||||
}
|
||||
let mut all_pages = HashSet::new();
|
||||
for pages in ctx.used_wasm_table_indices.values() {
|
||||
all_pages.extend(pages);
|
||||
}
|
||||
dbg_assert!(ctx.all_pages == all_pages);
|
||||
}
|
||||
|
||||
impl JitState {
|
||||
|
@ -116,12 +135,16 @@ impl JitState {
|
|||
let wasm_table_indices = 1..=(WASM_TABLE_SIZE - 1) as u16;
|
||||
|
||||
JitState {
|
||||
hot_pages: [0; HASH_PRIME as usize],
|
||||
wasm_table_index_free_list: Vec::from_iter(wasm_table_indices),
|
||||
entry_points: HashMap::new(),
|
||||
wasm_builder: WasmBuilder::new(),
|
||||
cache: BTreeMap::new(),
|
||||
page_has_pending_code: HashMap::new(),
|
||||
|
||||
entry_points: HashMap::new(),
|
||||
hot_pages: [0; HASH_PRIME as usize],
|
||||
|
||||
wasm_table_index_free_list: Vec::from_iter(wasm_table_indices),
|
||||
used_wasm_table_indices: HashMap::new(),
|
||||
all_pages: HashSet::new(),
|
||||
cache: HashMap::new(),
|
||||
compiling: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -143,6 +166,7 @@ enum BasicBlockType {
|
|||
|
||||
struct BasicBlock {
|
||||
addr: u32,
|
||||
virt_addr: i32,
|
||||
last_instruction_addr: u32,
|
||||
end_addr: u32,
|
||||
is_entry_block: bool,
|
||||
|
@ -179,7 +203,9 @@ pub const JIT_INSTR_BLOCK_BOUNDARY_FLAG: u32 = 1 << 0;
|
|||
|
||||
fn jit_hot_hash_page(page: Page) -> u32 { page.to_u32() % HASH_PRIME }
|
||||
|
||||
fn is_near_end_of_page(address: u32) -> bool { address & 0xFFF >= 0x1000 - MAX_INSTRUCTION_LENGTH }
|
||||
pub fn is_near_end_of_page(address: u32) -> bool {
|
||||
address & 0xFFF >= 0x1000 - MAX_INSTRUCTION_LENGTH
|
||||
}
|
||||
|
||||
pub fn jit_find_cache_entry(phys_address: u32, state_flags: CachedStateFlags) -> CachedCode {
|
||||
if is_near_end_of_page(phys_address) {
|
||||
|
@ -197,9 +223,7 @@ pub fn jit_find_cache_entry(phys_address: u32, state_flags: CachedStateFlags) ->
|
|||
};
|
||||
}
|
||||
else {
|
||||
if entry.state_flags != state_flags {
|
||||
profiler::stat_increment(stat::RUN_INTERPRETED_DIFFERENT_STATE);
|
||||
}
|
||||
profiler::stat_increment(stat::RUN_INTERPRETED_DIFFERENT_STATE);
|
||||
}
|
||||
},
|
||||
None => {},
|
||||
|
@ -210,14 +234,11 @@ pub fn jit_find_cache_entry(phys_address: u32, state_flags: CachedStateFlags) ->
|
|||
|
||||
#[no_mangle]
|
||||
pub fn jit_find_cache_entry_in_page(
|
||||
virt_eip: i32,
|
||||
phys_eip: u32,
|
||||
phys_address: u32,
|
||||
wasm_table_index: u16,
|
||||
state_flags: u32,
|
||||
) -> i32 {
|
||||
let state_flags = CachedStateFlags::of_u32(state_flags);
|
||||
let phys_address = virt_eip as u32 & 0xFFF | phys_eip & !0xFFF;
|
||||
|
||||
let ctx = get_jit_state();
|
||||
|
||||
match ctx.cache.get(&phys_address) {
|
||||
|
@ -253,31 +274,67 @@ pub fn record_entry_point(phys_address: u32) {
|
|||
}
|
||||
}
|
||||
|
||||
fn jit_find_basic_blocks(
|
||||
page: Page,
|
||||
entry_points: &HashSet<u16>,
|
||||
cpu: CpuContext,
|
||||
) -> (Vec<BasicBlock>, bool) {
|
||||
let mut to_visit_stack: Vec<u16> = entry_points.iter().cloned().collect();
|
||||
let mut marked_as_entry: HashSet<u16> = entry_points.clone();
|
||||
let page_high_bits = page.to_address();
|
||||
let mut basic_blocks: BTreeMap<u32, BasicBlock> = BTreeMap::new();
|
||||
let mut requires_loop_limit = false;
|
||||
// Maximum number of pages per wasm module. Necessary for the following reasons:
|
||||
// - There is an upper limit on the size of a single function in wasm (currently ~7MB in all browsers)
|
||||
// See https://github.com/WebAssembly/design/issues/1138
|
||||
// - v8 poorly handles large br_table elements and OOMs on modules much smaller than the above limit
|
||||
// See https://bugs.chromium.org/p/v8/issues/detail?id=9697 and https://bugs.chromium.org/p/v8/issues/detail?id=9141
|
||||
// Will hopefully be fixed in the near future by generating direct control flow
|
||||
const MAX_PAGES: usize = 5;
|
||||
|
||||
while let Some(to_visit_offset) = to_visit_stack.pop() {
|
||||
let to_visit = to_visit_offset as u32 | page_high_bits;
|
||||
if basic_blocks.contains_key(&to_visit) {
|
||||
fn jit_find_basic_blocks(
|
||||
ctx: &mut JitState,
|
||||
entry_points: HashSet<i32>,
|
||||
cpu: CpuContext,
|
||||
) -> Vec<BasicBlock> {
|
||||
let mut to_visit_stack: Vec<i32> = entry_points.iter().map(|e| *e).collect();
|
||||
let mut marked_as_entry: HashSet<i32> = entry_points.clone();
|
||||
|
||||
let mut basic_blocks: BTreeMap<u32, BasicBlock> = BTreeMap::new();
|
||||
let mut pages: HashSet<Page> = HashSet::new();
|
||||
|
||||
// 16-bit doesn't not work correctly, most likely due to instruction pointer wrap-around
|
||||
let max_pages = if cpu.state_flags.is_32() { MAX_PAGES } else { 1 };
|
||||
|
||||
while let Some(to_visit) = to_visit_stack.pop() {
|
||||
let phys_addr = match cpu::translate_address_read_no_side_effects(to_visit) {
|
||||
None => {
|
||||
dbg_log!("Not analysing {:x} (page not mapped)", to_visit);
|
||||
continue;
|
||||
},
|
||||
Some(phys_addr) => phys_addr,
|
||||
};
|
||||
|
||||
if basic_blocks.contains_key(&phys_addr) {
|
||||
continue;
|
||||
}
|
||||
if is_near_end_of_page(to_visit) {
|
||||
|
||||
pages.insert(Page::page_of(phys_addr));
|
||||
dbg_assert!(pages.len() <= max_pages);
|
||||
|
||||
let may_include_page = |page| pages.contains(&page) || pages.len() < max_pages;
|
||||
|
||||
if let Some(entry_points) = ctx.entry_points.remove(&Page::page_of(phys_addr)) {
|
||||
let address_hash = jit_hot_hash_page(Page::page_of(phys_addr)) as usize;
|
||||
ctx.hot_pages[address_hash] = 0;
|
||||
|
||||
for addr_low in entry_points {
|
||||
let addr = to_visit & !0xFFF | addr_low as i32;
|
||||
to_visit_stack.push(addr);
|
||||
marked_as_entry.insert(addr);
|
||||
}
|
||||
}
|
||||
|
||||
if is_near_end_of_page(phys_addr) {
|
||||
// Empty basic block, don't insert
|
||||
profiler::stat_increment(stat::COMPILE_CUT_OFF_AT_END_OF_PAGE);
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut current_address = to_visit;
|
||||
let mut current_address = phys_addr;
|
||||
let mut current_block = BasicBlock {
|
||||
addr: current_address,
|
||||
virt_addr: to_visit,
|
||||
last_instruction_addr: 0,
|
||||
end_addr: 0,
|
||||
ty: BasicBlockType::Exit,
|
||||
|
@ -296,14 +353,18 @@ fn jit_find_basic_blocks(
|
|||
let has_next_instruction = !analysis.no_next_instruction;
|
||||
current_address = ctx.eip;
|
||||
|
||||
dbg_assert!(Page::page_of(current_address) == Page::page_of(addr_before_instruction));
|
||||
let current_virt_addr = to_visit & !0xFFF | current_address as i32 & 0xFFF;
|
||||
|
||||
match analysis.ty {
|
||||
AnalysisType::Normal | AnalysisType::STI => {
|
||||
dbg_assert!(has_next_instruction);
|
||||
|
||||
if current_block.has_sti {
|
||||
// Convert next instruction after STI (i.e., the current instruction) into block boundary
|
||||
marked_as_entry.insert(current_address as u16 & 0xFFF);
|
||||
to_visit_stack.push(current_address as u16 & 0xFFF);
|
||||
|
||||
marked_as_entry.insert(current_virt_addr);
|
||||
to_visit_stack.push(current_virt_addr);
|
||||
|
||||
current_block.last_instruction_addr = addr_before_instruction;
|
||||
current_block.end_addr = current_address;
|
||||
|
@ -341,32 +402,30 @@ fn jit_find_basic_blocks(
|
|||
// conditional jump: continue at next and continue at jump target
|
||||
|
||||
let jump_target = if is_32 {
|
||||
current_address.wrapping_add(offset as u32)
|
||||
current_virt_addr + offset
|
||||
}
|
||||
else {
|
||||
ctx.cs_offset.wrapping_add(
|
||||
(current_address
|
||||
.wrapping_sub(ctx.cs_offset)
|
||||
.wrapping_add(offset as u32))
|
||||
& 0xFFFF,
|
||||
)
|
||||
ctx.cs_offset as i32
|
||||
+ (current_virt_addr - ctx.cs_offset as i32 + offset & 0xFFFF)
|
||||
};
|
||||
|
||||
dbg_assert!(has_next_instruction);
|
||||
to_visit_stack.push(current_address as u16 & 0xFFF);
|
||||
to_visit_stack.push(current_virt_addr);
|
||||
|
||||
let next_block_branch_taken_addr;
|
||||
|
||||
if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) {
|
||||
to_visit_stack.push(jump_target as u16 & 0xFFF);
|
||||
|
||||
next_block_branch_taken_addr = Some(jump_target);
|
||||
|
||||
// Very simple heuristic for "infinite loops": This
|
||||
// detects Linux's "calibrating delay loop"
|
||||
if jump_target == current_block.addr {
|
||||
dbg_log!("Basic block looping back to front");
|
||||
requires_loop_limit = true;
|
||||
if let Some(phys_jump_target) =
|
||||
cpu::translate_address_read_no_side_effects(jump_target as i32)
|
||||
{
|
||||
if !is_near_end_of_page(jump_target as u32)
|
||||
&& may_include_page(Page::page_of(phys_jump_target))
|
||||
{
|
||||
pages.insert(Page::page_of(phys_jump_target));
|
||||
to_visit_stack.push(jump_target);
|
||||
next_block_branch_taken_addr = Some(phys_jump_target);
|
||||
}
|
||||
else {
|
||||
next_block_branch_taken_addr = None;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -401,28 +460,34 @@ fn jit_find_basic_blocks(
|
|||
// non-conditional jump: continue at jump target
|
||||
|
||||
let jump_target = if is_32 {
|
||||
current_address.wrapping_add(offset as u32)
|
||||
current_virt_addr + offset
|
||||
}
|
||||
else {
|
||||
ctx.cs_offset.wrapping_add(
|
||||
(current_address
|
||||
.wrapping_sub(ctx.cs_offset)
|
||||
.wrapping_add(offset as u32))
|
||||
& 0xFFFF,
|
||||
)
|
||||
ctx.cs_offset as i32
|
||||
+ (current_virt_addr - ctx.cs_offset as i32 + offset & 0xFFFF)
|
||||
};
|
||||
|
||||
if has_next_instruction {
|
||||
// Execution will eventually come back to the next instruction (CALL)
|
||||
marked_as_entry.insert(current_address as u16 & 0xFFF);
|
||||
to_visit_stack.push(current_address as u16 & 0xFFF);
|
||||
marked_as_entry.insert(current_virt_addr);
|
||||
to_visit_stack.push(current_virt_addr);
|
||||
}
|
||||
|
||||
if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) {
|
||||
current_block.ty = BasicBlockType::Normal {
|
||||
next_block_addr: jump_target,
|
||||
};
|
||||
to_visit_stack.push(jump_target as u16 & 0xFFF);
|
||||
if let Some(phys_jump_target) =
|
||||
cpu::translate_address_read_no_side_effects(jump_target as i32)
|
||||
{
|
||||
if !is_near_end_of_page(jump_target as u32)
|
||||
&& may_include_page(Page::page_of(phys_jump_target))
|
||||
{
|
||||
pages.insert(Page::page_of(phys_jump_target));
|
||||
to_visit_stack.push(jump_target);
|
||||
current_block.ty = BasicBlockType::Normal {
|
||||
next_block_addr: phys_jump_target,
|
||||
};
|
||||
}
|
||||
else {
|
||||
current_block.ty = BasicBlockType::Exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
current_block.ty = BasicBlockType::Exit;
|
||||
|
@ -441,8 +506,8 @@ fn jit_find_basic_blocks(
|
|||
// to the next instruction. Create a new basic block
|
||||
// starting at the next instruction and register it as an
|
||||
// entry point
|
||||
marked_as_entry.insert(current_address as u16 & 0xFFF);
|
||||
to_visit_stack.push(current_address as u16 & 0xFFF);
|
||||
marked_as_entry.insert(current_virt_addr);
|
||||
to_visit_stack.push(current_virt_addr);
|
||||
}
|
||||
|
||||
current_block.last_instruction_addr = addr_before_instruction;
|
||||
|
@ -463,14 +528,16 @@ fn jit_find_basic_blocks(
|
|||
.range(..current_block.addr)
|
||||
.next_back()
|
||||
.filter(|(_, previous_block)| (!previous_block.has_sti))
|
||||
.map(|(_, previous_block)| (previous_block.addr, previous_block.end_addr));
|
||||
.map(|(_, previous_block)| previous_block.clone());
|
||||
|
||||
if let Some((start_addr, end_addr)) = previous_block {
|
||||
if current_block.addr < end_addr {
|
||||
if let Some(previous_block) = previous_block {
|
||||
if current_block.addr < previous_block.end_addr {
|
||||
// If this block overlaps with the previous block, re-analyze the previous block
|
||||
let old_block = basic_blocks.remove(&start_addr);
|
||||
to_visit_stack.push(previous_block.virt_addr);
|
||||
|
||||
let addr = previous_block.addr;
|
||||
let old_block = basic_blocks.remove(&addr);
|
||||
dbg_assert!(old_block.is_some());
|
||||
to_visit_stack.push(start_addr as u16 & 0xFFF);
|
||||
|
||||
// Note that this does not ensure the invariant that two consecutive blocks don't
|
||||
// overlay. For that, we also need to check the following block.
|
||||
|
@ -484,8 +551,10 @@ fn jit_find_basic_blocks(
|
|||
basic_blocks.insert(current_block.addr, current_block);
|
||||
}
|
||||
|
||||
dbg_assert!(pages.len() <= max_pages);
|
||||
|
||||
for block in basic_blocks.values_mut() {
|
||||
if marked_as_entry.contains(&(block.addr as u16 & 0xFFF)) {
|
||||
if marked_as_entry.contains(&block.virt_addr) {
|
||||
block.is_entry_block = true;
|
||||
}
|
||||
}
|
||||
|
@ -511,27 +580,31 @@ fn jit_find_basic_blocks(
|
|||
}
|
||||
}
|
||||
|
||||
(basic_blocks, requires_loop_limit)
|
||||
basic_blocks
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
#[cfg(debug_assertions)]
|
||||
pub fn jit_force_generate_unsafe(phys_addr: u32) {
|
||||
pub fn jit_force_generate_unsafe(virt_addr: i32) {
|
||||
let ctx = get_jit_state();
|
||||
let phys_addr = cpu::translate_address_read(virt_addr).unwrap();
|
||||
record_entry_point(phys_addr);
|
||||
let cs_offset = cpu::get_seg_cs() as u32;
|
||||
let state_flags = cpu::pack_current_state_flags();
|
||||
jit_analyze_and_generate(ctx, Page::page_of(phys_addr), cs_offset, state_flags);
|
||||
jit_analyze_and_generate(ctx, virt_addr, phys_addr, cs_offset, state_flags);
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn jit_analyze_and_generate(
|
||||
ctx: &mut JitState,
|
||||
page: Page,
|
||||
virt_entry_point: i32,
|
||||
phys_entry_point: u32,
|
||||
cs_offset: u32,
|
||||
state_flags: CachedStateFlags,
|
||||
) {
|
||||
if ctx.page_has_pending_code.contains_key(&page) {
|
||||
let page = Page::page_of(phys_entry_point);
|
||||
|
||||
if ctx.compiling.is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -548,17 +621,24 @@ fn jit_analyze_and_generate(
|
|||
state_flags,
|
||||
};
|
||||
|
||||
let (basic_blocks, requires_loop_limit) =
|
||||
jit_find_basic_blocks(page, &entry_points, cpu.clone());
|
||||
dbg_assert!(
|
||||
cpu::translate_address_read_no_side_effects(virt_entry_point).unwrap()
|
||||
== phys_entry_point
|
||||
);
|
||||
let virt_page = Page::page_of(virt_entry_point as u32);
|
||||
let entry_points: HashSet<i32> = entry_points
|
||||
.iter()
|
||||
.map(|e| virt_page.to_address() as i32 | *e as i32)
|
||||
.collect();
|
||||
let basic_blocks = jit_find_basic_blocks(ctx, entry_points, cpu.clone());
|
||||
|
||||
//for b in basic_blocks.iter() {
|
||||
// dbg_log!(
|
||||
// "> Basic block from {:x} to {:x}, is_entry={}",
|
||||
// b.addr,
|
||||
// b.end_addr,
|
||||
// b.is_entry_block
|
||||
// );
|
||||
//}
|
||||
let mut pages = HashSet::new();
|
||||
|
||||
for b in basic_blocks.iter() {
|
||||
// Remove this assertion once page-crossing jit is enabled
|
||||
dbg_assert!(Page::page_of(b.addr) == Page::page_of(b.end_addr));
|
||||
pages.insert(Page::page_of(b.addr));
|
||||
}
|
||||
|
||||
if ctx.wasm_table_index_free_list.is_empty() {
|
||||
dbg_log!("wasm_table_index_free_list empty, clearing cache",);
|
||||
|
@ -588,9 +668,14 @@ fn jit_analyze_and_generate(
|
|||
.expect("allocate wasm table index");
|
||||
dbg_assert!(wasm_table_index != 0);
|
||||
|
||||
dbg_assert!(!pages.is_empty());
|
||||
dbg_assert!(pages.len() <= MAX_PAGES);
|
||||
ctx.used_wasm_table_indices
|
||||
.insert(wasm_table_index, pages.clone());
|
||||
ctx.all_pages.extend(pages.clone());
|
||||
|
||||
jit_generate_module(
|
||||
&basic_blocks,
|
||||
requires_loop_limit,
|
||||
cpu.clone(),
|
||||
&mut ctx.wasm_builder,
|
||||
wasm_table_index,
|
||||
|
@ -601,14 +686,14 @@ fn jit_analyze_and_generate(
|
|||
stat::COMPILE_WASM_TOTAL_BYTES,
|
||||
ctx.wasm_builder.get_output_len() as u64,
|
||||
);
|
||||
profiler::stat_increment_by(stat::COMPILE_PAGE, pages.len() as u64);
|
||||
|
||||
cpu::tlb_set_has_code(page, true);
|
||||
for &p in &pages {
|
||||
cpu::tlb_set_has_code(p, true);
|
||||
}
|
||||
|
||||
let previous_state = ctx.page_has_pending_code.insert(
|
||||
page,
|
||||
(wasm_table_index, PageState::Compiling { basic_blocks }),
|
||||
);
|
||||
dbg_assert!(previous_state.is_none());
|
||||
dbg_assert!(ctx.compiling.is_none());
|
||||
ctx.compiling = Some((wasm_table_index, PageState::Compiling { basic_blocks }));
|
||||
|
||||
let phys_addr = page.to_address();
|
||||
|
||||
|
@ -624,9 +709,11 @@ fn jit_analyze_and_generate(
|
|||
profiler::stat_increment(stat::COMPILE_SUCCESS);
|
||||
}
|
||||
else {
|
||||
//dbg_log("No basic blocks, not generating code");
|
||||
//dbg_log!("No basic blocks, not generating code");
|
||||
// Nothing to do
|
||||
}
|
||||
|
||||
check_jit_state_invariants(ctx);
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -639,15 +726,20 @@ pub fn codegen_finalize_finished(
|
|||
|
||||
dbg_assert!(wasm_table_index != 0);
|
||||
|
||||
let page = Page::page_of(phys_addr);
|
||||
dbg_log!(
|
||||
"Finished compiling for page at {:x}",
|
||||
Page::page_of(phys_addr).to_address()
|
||||
);
|
||||
|
||||
let basic_blocks = match ctx.page_has_pending_code.remove(&page) {
|
||||
let basic_blocks = match mem::replace(&mut ctx.compiling, None) {
|
||||
None => {
|
||||
dbg_assert!(false);
|
||||
return;
|
||||
},
|
||||
Some((in_progress_wasm_table_index, PageState::CompilingWritten)) => {
|
||||
dbg_assert!(wasm_table_index == in_progress_wasm_table_index);
|
||||
|
||||
profiler::stat_increment(stat::INVALIDATE_MODULE_WRITTEN_WHILE_COMPILED);
|
||||
free_wasm_table_index(ctx, wasm_table_index);
|
||||
return;
|
||||
},
|
||||
|
@ -665,9 +757,8 @@ pub fn codegen_finalize_finished(
|
|||
for (i, block) in basic_blocks.iter().enumerate() {
|
||||
profiler::stat_increment(stat::COMPILE_BASIC_BLOCK);
|
||||
|
||||
if block.is_entry_block && block.addr != block.end_addr {
|
||||
dbg_assert!(block.addr != 0);
|
||||
|
||||
dbg_assert!(block.addr < block.end_addr);
|
||||
if block.is_entry_block {
|
||||
let initial_state = i.safe_to_u16();
|
||||
|
||||
let entry = Entry {
|
||||
|
@ -682,10 +773,17 @@ pub fn codegen_finalize_finished(
|
|||
opcode: memory::read32s(block.addr) as u32,
|
||||
};
|
||||
|
||||
let old_entry = ctx.cache.insert(block.addr, entry);
|
||||
let maybe_old_entry = ctx.cache.insert(block.addr, entry);
|
||||
|
||||
if let Some(old_entry) = old_entry {
|
||||
if let Some(old_entry) = maybe_old_entry {
|
||||
check_for_unused_wasm_table_index.insert(old_entry.wasm_table_index);
|
||||
|
||||
if old_entry.state_flags == state_flags {
|
||||
// TODO: stat
|
||||
}
|
||||
else {
|
||||
// TODO: stat
|
||||
}
|
||||
}
|
||||
|
||||
entry_point_count += 1;
|
||||
|
@ -695,18 +793,48 @@ pub fn codegen_finalize_finished(
|
|||
|
||||
dbg_assert!(entry_point_count > 0);
|
||||
|
||||
for (_, entry) in ctx.cache.range(page.address_range()) {
|
||||
check_for_unused_wasm_table_index.remove(&entry.wasm_table_index);
|
||||
for index in check_for_unused_wasm_table_index {
|
||||
let pages = ctx.used_wasm_table_indices.get(&index).unwrap();
|
||||
|
||||
let mut is_used = false;
|
||||
'outer: for p in pages {
|
||||
for addr in p.address_range() {
|
||||
if let Some(entry) = ctx.cache.get(&addr) {
|
||||
if entry.wasm_table_index == index {
|
||||
is_used = true;
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !is_used {
|
||||
profiler::stat_increment(stat::INVALIDATE_MODULE_UNUSED_AFTER_OVERWRITE);
|
||||
free_wasm_table_index(ctx, index);
|
||||
}
|
||||
|
||||
if !is_used {
|
||||
for (_, entry) in &ctx.cache {
|
||||
dbg_assert!(entry.wasm_table_index != index);
|
||||
}
|
||||
}
|
||||
else {
|
||||
let mut ok = false;
|
||||
for (_, entry) in &ctx.cache {
|
||||
if entry.wasm_table_index == index {
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
dbg_assert!(ok);
|
||||
}
|
||||
}
|
||||
|
||||
for index in check_for_unused_wasm_table_index {
|
||||
free_wasm_table_index(ctx, index);
|
||||
}
|
||||
check_jit_state_invariants(ctx);
|
||||
}
|
||||
|
||||
fn jit_generate_module(
|
||||
basic_blocks: &Vec<BasicBlock>,
|
||||
requires_loop_limit: bool,
|
||||
mut cpu: CpuContext,
|
||||
builder: &mut WasmBuilder,
|
||||
wasm_table_index: u16,
|
||||
|
@ -725,7 +853,7 @@ fn jit_generate_module(
|
|||
let gen_local_state = builder.set_new_local();
|
||||
|
||||
// initialise max_iterations
|
||||
let gen_local_iteration_counter = if JIT_ALWAYS_USE_LOOP_SAFETY || requires_loop_limit {
|
||||
let gen_local_iteration_counter = if JIT_ALWAYS_USE_LOOP_SAFETY {
|
||||
builder.const_i32(JIT_MAX_ITERATIONS_PER_FUNCTION as i32);
|
||||
Some(builder.set_new_local())
|
||||
}
|
||||
|
@ -840,6 +968,22 @@ fn jit_generate_module(
|
|||
// - All instructions that don't change eip
|
||||
// - Unconditional jump
|
||||
|
||||
if Page::page_of(*next_block_addr) != Page::page_of(block.addr) {
|
||||
codegen::gen_page_switch_check(
|
||||
ctx,
|
||||
*next_block_addr,
|
||||
block.last_instruction_addr,
|
||||
);
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
codegen::gen_fn2_const(
|
||||
ctx.builder,
|
||||
"check_page_switch",
|
||||
block.addr,
|
||||
*next_block_addr,
|
||||
);
|
||||
}
|
||||
|
||||
let next_basic_block_index = *basic_block_indices
|
||||
.get(&next_block_addr)
|
||||
.expect("basic_block_indices.get (Normal)");
|
||||
|
@ -882,6 +1026,29 @@ fn jit_generate_module(
|
|||
.get(&next_block_branch_taken_addr)
|
||||
.expect("basic_block_indices.get (branch taken)");
|
||||
|
||||
dbg_assert!(
|
||||
(block.end_addr + jump_offset as u32) & 0xFFF
|
||||
== next_block_branch_taken_addr & 0xFFF
|
||||
);
|
||||
|
||||
if Page::page_of(next_block_branch_taken_addr) != Page::page_of(block.addr) {
|
||||
ctx.current_brtable_depth += 1;
|
||||
codegen::gen_page_switch_check(
|
||||
ctx,
|
||||
next_block_branch_taken_addr,
|
||||
block.last_instruction_addr,
|
||||
);
|
||||
ctx.current_brtable_depth -= 1;
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
codegen::gen_fn2_const(
|
||||
ctx.builder,
|
||||
"check_page_switch",
|
||||
block.addr,
|
||||
next_block_branch_taken_addr,
|
||||
);
|
||||
}
|
||||
|
||||
ctx.builder
|
||||
.const_i32(next_basic_block_branch_taken_index as i32);
|
||||
ctx.builder.set_local(&gen_local_state);
|
||||
|
@ -896,6 +1063,7 @@ fn jit_generate_module(
|
|||
}
|
||||
|
||||
if let Some(next_block_addr) = next_block_addr {
|
||||
dbg_assert!(Page::page_of(next_block_addr) == Page::page_of(block.addr));
|
||||
// Branch not taken
|
||||
|
||||
let next_basic_block_index = *basic_block_indices
|
||||
|
@ -1029,6 +1197,7 @@ fn jit_generate_basic_block(ctx: &mut JitContext, block: &BasicBlock) {
|
|||
|
||||
#[no_mangle]
|
||||
pub fn jit_increase_hotness_and_maybe_compile(
|
||||
virt_address: i32,
|
||||
phys_address: u32,
|
||||
cs_offset: u32,
|
||||
state_flags: CachedStateFlags,
|
||||
|
@ -1039,47 +1208,129 @@ pub fn jit_increase_hotness_and_maybe_compile(
|
|||
let address_hash = jit_hot_hash_page(page) as usize;
|
||||
ctx.hot_pages[address_hash] += hotness;
|
||||
if ctx.hot_pages[address_hash] >= JIT_THRESHOLD {
|
||||
ctx.hot_pages[address_hash] = 0;
|
||||
jit_analyze_and_generate(ctx, page, cs_offset, state_flags)
|
||||
if ctx.compiling.is_some() {
|
||||
return;
|
||||
}
|
||||
// only try generating if we're in the correct address space
|
||||
if cpu::translate_address_read_no_side_effects(virt_address) == Some(phys_address) {
|
||||
ctx.hot_pages[address_hash] = 0;
|
||||
jit_analyze_and_generate(ctx, virt_address, phys_address, cs_offset, state_flags)
|
||||
}
|
||||
else {
|
||||
profiler::stat_increment(stat::COMPILE_WRONG_ADDRESS_SPACE);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn free_wasm_table_index(ctx: &mut JitState, wasm_table_index: u16) {
|
||||
if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
|
||||
if CHECK_JIT_STATE_INVARIANTS {
|
||||
dbg_assert!(!ctx.wasm_table_index_free_list.contains(&wasm_table_index));
|
||||
|
||||
match &ctx.compiling {
|
||||
Some((wasm_table_index_compiling, _)) => {
|
||||
dbg_assert!(
|
||||
*wasm_table_index_compiling != wasm_table_index,
|
||||
"Attempt to free wasm table index that is currently being compiled"
|
||||
);
|
||||
},
|
||||
_ => {},
|
||||
}
|
||||
}
|
||||
|
||||
match ctx.used_wasm_table_indices.remove(&wasm_table_index) {
|
||||
None => dbg_assert!(false),
|
||||
Some(_pages) => {
|
||||
//dbg_assert!(!pages.is_empty()); // only if CompilingWritten
|
||||
},
|
||||
}
|
||||
ctx.wasm_table_index_free_list.push(wasm_table_index);
|
||||
|
||||
dbg_assert!(
|
||||
ctx.wasm_table_index_free_list.len() + ctx.used_wasm_table_indices.len()
|
||||
== WASM_TABLE_SIZE as usize - 1
|
||||
);
|
||||
|
||||
// It is not strictly necessary to clear the function, but it will fail more predictably if we
|
||||
// accidentally use the function and may garbage collect unused modules earlier
|
||||
jit_clear_func(wasm_table_index);
|
||||
|
||||
rebuild_all_pages(ctx);
|
||||
|
||||
check_jit_state_invariants(ctx);
|
||||
}
|
||||
|
||||
pub fn rebuild_all_pages(ctx: &mut JitState) {
|
||||
// rebuild ctx.all_pages
|
||||
let mut all_pages = HashSet::new();
|
||||
for pages in ctx.used_wasm_table_indices.values() {
|
||||
all_pages.extend(pages);
|
||||
}
|
||||
ctx.all_pages = all_pages;
|
||||
}
|
||||
|
||||
/// Register a write in this page: Delete all present code
|
||||
pub fn jit_dirty_page(ctx: &mut JitState, page: Page) {
|
||||
let mut did_have_code = false;
|
||||
|
||||
let entries: Vec<u32> = ctx
|
||||
.cache
|
||||
.range(page.address_range())
|
||||
.map(|(i, _)| *i)
|
||||
.collect();
|
||||
|
||||
let mut index_to_free = HashSet::new();
|
||||
|
||||
for phys_addr in entries {
|
||||
let entry = ctx.cache.remove(&phys_addr).unwrap();
|
||||
if ctx.all_pages.contains(&page) {
|
||||
profiler::stat_increment(stat::INVALIDATE_PAGE_HAD_CODE);
|
||||
did_have_code = true;
|
||||
index_to_free.insert(entry.wasm_table_index);
|
||||
}
|
||||
let mut index_to_free = HashSet::new();
|
||||
|
||||
for index in index_to_free {
|
||||
free_wasm_table_index(ctx, index)
|
||||
let compiling = match &ctx.compiling {
|
||||
Some((wasm_table_index, _)) => Some(*wasm_table_index),
|
||||
None => None,
|
||||
};
|
||||
|
||||
for (&wasm_table_index, pages) in &ctx.used_wasm_table_indices {
|
||||
if Some(wasm_table_index) != compiling && pages.contains(&page) {
|
||||
index_to_free.insert(wasm_table_index);
|
||||
}
|
||||
}
|
||||
|
||||
match &ctx.compiling {
|
||||
None => {},
|
||||
Some((_, PageState::CompilingWritten)) => {},
|
||||
Some((wasm_table_index, PageState::Compiling { .. })) => {
|
||||
let pages = ctx
|
||||
.used_wasm_table_indices
|
||||
.get_mut(wasm_table_index)
|
||||
.unwrap();
|
||||
if pages.contains(&page) {
|
||||
pages.clear();
|
||||
ctx.compiling = Some((*wasm_table_index, PageState::CompilingWritten));
|
||||
rebuild_all_pages(ctx);
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
for index in &index_to_free {
|
||||
match ctx.used_wasm_table_indices.get(&index) {
|
||||
None => dbg_assert!(false),
|
||||
Some(pages) => {
|
||||
for &p in pages {
|
||||
for addr in p.address_range() {
|
||||
if let Some(e) = ctx.cache.get(&addr) {
|
||||
if index_to_free.contains(&e.wasm_table_index) {
|
||||
ctx.cache.remove(&addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
for index in index_to_free {
|
||||
profiler::stat_increment(stat::INVALIDATE_MODULE_DIRTY_PAGE);
|
||||
free_wasm_table_index(ctx, index)
|
||||
}
|
||||
}
|
||||
|
||||
match ctx.entry_points.remove(&page) {
|
||||
None => {},
|
||||
Some(_entry_points) => {
|
||||
profiler::stat_increment(stat::INVALIDATE_PAGE_HAD_ENTRY_POINTS);
|
||||
did_have_code = true;
|
||||
|
||||
// don't try to compile code in this page anymore until it's hot again
|
||||
|
@ -1087,22 +1338,22 @@ pub fn jit_dirty_page(ctx: &mut JitState, page: Page) {
|
|||
},
|
||||
}
|
||||
|
||||
match ctx.page_has_pending_code.get(&page) {
|
||||
None => {},
|
||||
Some((_, PageState::CompilingWritten)) => {},
|
||||
Some((wasm_table_index, PageState::Compiling { .. })) => {
|
||||
let wasm_table_index = *wasm_table_index;
|
||||
did_have_code = true;
|
||||
ctx.page_has_pending_code
|
||||
.insert(page, (wasm_table_index, PageState::CompilingWritten));
|
||||
},
|
||||
for pages in ctx.used_wasm_table_indices.values() {
|
||||
dbg_assert!(!pages.contains(&page));
|
||||
}
|
||||
|
||||
dbg_assert!(!jit_page_has_code(page));
|
||||
check_jit_state_invariants(ctx);
|
||||
|
||||
dbg_assert!(!ctx.all_pages.contains(&page));
|
||||
dbg_assert!(!jit_page_has_code_ctx(ctx, page));
|
||||
|
||||
if did_have_code {
|
||||
cpu::tlb_set_has_code(page, false);
|
||||
}
|
||||
|
||||
if !did_have_code {
|
||||
profiler::stat_increment(stat::DIRTY_PAGE_DID_NOT_HAVE_CODE);
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -1124,13 +1375,14 @@ pub fn jit_dirty_cache_small(start_addr: u32, end_addr: u32) {
|
|||
let start_page = Page::page_of(start_addr);
|
||||
let end_page = Page::page_of(end_addr - 1);
|
||||
|
||||
jit_dirty_page(get_jit_state(), start_page);
|
||||
let ctx = get_jit_state();
|
||||
jit_dirty_page(ctx, start_page);
|
||||
|
||||
// Note: This can't happen when paging is enabled, as writes across
|
||||
// boundaries are split up on two pages
|
||||
if start_page != end_page {
|
||||
dbg_assert!(start_page.to_u32() + 1 == end_page.to_u32());
|
||||
jit_dirty_page(get_jit_state(), end_page);
|
||||
jit_dirty_page(ctx, end_page);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1143,11 +1395,14 @@ pub fn jit_clear_cache(ctx: &mut JitState) {
|
|||
for page in ctx.entry_points.keys() {
|
||||
pages_with_code.insert(*page);
|
||||
}
|
||||
for addr in ctx.cache.keys() {
|
||||
pages_with_code.insert(Page::page_of(*addr));
|
||||
for &p in &ctx.all_pages {
|
||||
pages_with_code.insert(p);
|
||||
}
|
||||
for page in ctx.page_has_pending_code.keys() {
|
||||
pages_with_code.insert(*page);
|
||||
for addr in ctx.cache.keys() {
|
||||
dbg_assert!(pages_with_code.contains(&Page::page_of(*addr)));
|
||||
}
|
||||
for pages in ctx.used_wasm_table_indices.values() {
|
||||
dbg_assert!(pages_with_code.is_superset(pages));
|
||||
}
|
||||
|
||||
for page in pages_with_code {
|
||||
|
@ -1155,20 +1410,15 @@ pub fn jit_clear_cache(ctx: &mut JitState) {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn jit_page_has_code(page: Page) -> bool {
|
||||
let ctx = get_jit_state();
|
||||
let mut entries = ctx.cache.range(page.address_range());
|
||||
// Does the page have compiled code
|
||||
entries.next().is_some() ||
|
||||
// Or are there any entry points that need to be removed on write to the page
|
||||
// (this function is used to mark the has_code bit in the tlb to optimise away calls jit_dirty_page)
|
||||
ctx.entry_points.contains_key(&page) ||
|
||||
match ctx.page_has_pending_code.get(&page) { Some(&(_, PageState::Compiling { .. })) => true, _ => false }
|
||||
pub fn jit_page_has_code(page: Page) -> bool { jit_page_has_code_ctx(get_jit_state(), page) }
|
||||
|
||||
pub fn jit_page_has_code_ctx(ctx: &mut JitState, page: Page) -> bool {
|
||||
ctx.all_pages.contains(&page) || ctx.entry_points.contains_key(&page)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub fn jit_get_wasm_table_index_free_list_count() -> u32 {
|
||||
if cfg!(debug_assertions) {
|
||||
if cfg!(feature = "profiler") {
|
||||
get_jit_state().wasm_table_index_free_list.len() as u32
|
||||
}
|
||||
else {
|
||||
|
@ -1179,13 +1429,18 @@ pub fn jit_get_wasm_table_index_free_list_count() -> u32 {
|
|||
#[cfg(feature = "profiler")]
|
||||
pub fn check_missed_entry_points(phys_address: u32, state_flags: CachedStateFlags) {
|
||||
let ctx = get_jit_state();
|
||||
let page = Page::page_of(phys_address);
|
||||
|
||||
for (addr, entry) in ctx.cache.range(page.address_range()) {
|
||||
if entry.state_flags == state_flags
|
||||
&& phys_address >= *addr
|
||||
&& phys_address < *addr + entry.len
|
||||
{
|
||||
// backwards until beginning of page
|
||||
for offset in 0..=(phys_address & 0xFFF) {
|
||||
let addr = phys_address - offset;
|
||||
dbg_assert!(phys_address >= addr);
|
||||
|
||||
if let Some(entry) = ctx.cache.get(&addr) {
|
||||
if entry.state_flags != state_flags || phys_address >= addr + entry.len {
|
||||
// give up search on first entry that is not a match
|
||||
break;
|
||||
}
|
||||
|
||||
profiler::stat_increment(stat::RUN_INTERPRETED_MISSED_COMPILED_ENTRY_LOOKUP);
|
||||
|
||||
let last_jump_type = unsafe { cpu::debug_last_jump.name() };
|
||||
|
@ -1198,8 +1453,8 @@ pub fn check_missed_entry_points(phys_address: u32, state_flags: CachedStateFlag
|
|||
"Compiled exists, but no entry point, \
|
||||
start={:x} end={:x} phys_addr={:x} opcode={:02x} {:02x} {:02x} {:02x}. \
|
||||
Last jump at {:x} ({}) opcode={:02x} {:02x} {:02x} {:02x}",
|
||||
*addr,
|
||||
*addr + entry.len,
|
||||
addr,
|
||||
addr + entry.len,
|
||||
phys_address,
|
||||
opcode & 0xFF,
|
||||
opcode >> 8 & 0xFF,
|
||||
|
|
|
@ -2,11 +2,12 @@
|
|||
pub enum stat {
|
||||
COMPILE,
|
||||
COMPILE_SUCCESS,
|
||||
COMPILE_WRONG_ADDRESS_SPACE,
|
||||
COMPILE_CUT_OFF_AT_END_OF_PAGE,
|
||||
COMPILE_WITH_LOOP_SAFETY,
|
||||
COMPILE_PAGE,
|
||||
COMPILE_BASIC_BLOCK,
|
||||
COMPILE_ENTRY_POINT,
|
||||
COMPILE_DUPLICATE_ENTRY,
|
||||
COMPILE_WASM_TOTAL_BYTES,
|
||||
CACHE_MISMATCH,
|
||||
|
||||
|
@ -21,6 +22,8 @@ pub enum stat {
|
|||
RUN_FROM_CACHE,
|
||||
RUN_FROM_CACHE_STEPS,
|
||||
|
||||
FAILED_PAGE_CHANGE,
|
||||
|
||||
SAFE_READ_FAST,
|
||||
SAFE_READ_SLOW_PAGE_CROSSED,
|
||||
SAFE_READ_SLOW_NOT_VALID,
|
||||
|
@ -44,20 +47,23 @@ pub enum stat {
|
|||
SAFE_READ_WRITE_SLOW_HAS_CODE,
|
||||
|
||||
PAGE_FAULT,
|
||||
TLB_MISS,
|
||||
|
||||
DO_RUN,
|
||||
DO_MANY_CYCLES,
|
||||
CYCLE_INTERNAL,
|
||||
|
||||
INVALIDATE_ALL_MODULES_NO_FREE_WASM_INDICES,
|
||||
INVALIDATE_PAGE,
|
||||
INVALIDATE_MODULE,
|
||||
INVALIDATE_CACHE_ENTRY,
|
||||
INVALIDATE_MODULE_WRITTEN_WHILE_COMPILED,
|
||||
INVALIDATE_MODULE_UNUSED_AFTER_OVERWRITE,
|
||||
INVALIDATE_MODULE_DIRTY_PAGE,
|
||||
|
||||
INVALIDATE_MODULE_CACHE_FULL,
|
||||
INVALIDATE_SINGLE_ENTRY_CACHE_FULL,
|
||||
INVALIDATE_PAGE_HAD_CODE,
|
||||
INVALIDATE_PAGE_HAD_ENTRY_POINTS,
|
||||
DIRTY_PAGE_DID_NOT_HAVE_CODE,
|
||||
|
||||
RUN_FROM_CACHE_EXIT_SAME_PAGE,
|
||||
RUN_FROM_CACHE_EXIT_NEAR_END_OF_PAGE,
|
||||
RUN_FROM_CACHE_EXIT_DIFFERENT_PAGE,
|
||||
|
||||
CLEAR_TLB,
|
||||
|
|
|
@ -25,7 +25,6 @@ enum FunctionType {
|
|||
FN2_I64_I32_RET_I64_TYPE_INDEX,
|
||||
|
||||
FN3_RET_TYPE_INDEX,
|
||||
FN4_RET_TYPE_INDEX,
|
||||
|
||||
FN3_I64_I32_I32_TYPE_INDEX,
|
||||
FN3_I32_I64_I32_TYPE_INDEX,
|
||||
|
@ -314,16 +313,6 @@ impl WasmBuilder {
|
|||
self.output.push(1);
|
||||
self.output.push(op::TYPE_I32);
|
||||
},
|
||||
FunctionType::FN4_RET_TYPE_INDEX => {
|
||||
self.output.push(op::TYPE_FUNC);
|
||||
self.output.push(4);
|
||||
self.output.push(op::TYPE_I32);
|
||||
self.output.push(op::TYPE_I32);
|
||||
self.output.push(op::TYPE_I32);
|
||||
self.output.push(op::TYPE_I32);
|
||||
self.output.push(1);
|
||||
self.output.push(op::TYPE_I32);
|
||||
},
|
||||
FunctionType::FN3_I64_I32_I32_TYPE_INDEX => {
|
||||
self.output.push(op::TYPE_FUNC);
|
||||
self.output.push(3);
|
||||
|
@ -869,9 +858,6 @@ impl WasmBuilder {
|
|||
pub fn call_fn3_i32_i64_i32_ret(&mut self, name: &str) {
|
||||
self.call_fn(name, FunctionType::FN3_I32_I64_I32_RET_TYPE_INDEX)
|
||||
}
|
||||
pub fn call_fn4_ret(&mut self, name: &str) {
|
||||
self.call_fn(name, FunctionType::FN4_RET_TYPE_INDEX)
|
||||
}
|
||||
pub fn call_fn4_i32_i64_i64_i32_ret(&mut self, name: &str) {
|
||||
self.call_fn(name, FunctionType::FN4_I32_I64_I64_I32_RET_TYPE_INDEX)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue