v86/src/rust/jit.rs
2020-12-31 19:14:30 -06:00

1543 lines
52 KiB
Rust

use std::collections::{BTreeMap, HashMap, HashSet};
use std::iter::FromIterator;
use analysis::AnalysisType;
use codegen;
use cpu;
use cpu_context::CpuContext;
use global_pointers;
use jit_instructions;
use page::Page;
use profiler;
use profiler::stat;
use state_flags::CachedStateFlags;
use util::SafeToU16;
use wasmgen::module_init::{WasmBuilder, WasmLocal};
use wasmgen::wasm_util::WasmBuf;
pub const WASM_TABLE_SIZE: u32 = 900;
pub const HASH_PRIME: u32 = 6151;
pub const CHECK_JIT_CACHE_ARRAY_INVARIANTS: bool = false;
pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 10000;
pub const JIT_ALWAYS_USE_LOOP_SAFETY: bool = true;
pub const JIT_THRESHOLD: u32 = 200 * 1000;
const CODE_CACHE_SEARCH_SIZE: u32 = 8;
const MAX_INSTRUCTION_LENGTH: u32 = 16;
mod jit_cache_array {
use page::Page;
use state_flags::CachedStateFlags;
// Note: For performance reasons, this is global state. See jit_find_cache_entry
const NO_NEXT_ENTRY: u32 = 0xffff_ffff;
// When changing this, you also need to bump global-base
pub const SIZE: u32 = 0x40000;
pub const MASK: u32 = SIZE - 1;
#[derive(Copy, Clone)]
pub struct Entry {
pub start_addr: u32,
#[cfg(any(debug_assertions, feature = "profiler"))]
pub len: u32,
#[cfg(debug_assertions)]
pub opcode: u32,
// an index into jit_cache_array for the next code_cache entry within the same physical page
next_index_same_page: u32,
pub initial_state: u16,
pub wasm_table_index: u16,
pub state_flags: CachedStateFlags,
pub pending: bool,
}
impl Entry {
pub fn create(
start_addr: u32,
next_index_same_page: Option<u32>,
wasm_table_index: u16,
initial_state: u16,
state_flags: CachedStateFlags,
pending: bool,
) -> Entry {
let next_index_same_page = next_index_same_page.unwrap_or(NO_NEXT_ENTRY);
Entry {
start_addr,
next_index_same_page,
wasm_table_index,
initial_state,
state_flags,
pending,
#[cfg(any(debug_assertions, feature = "profiler"))]
len: 0,
#[cfg(debug_assertions)]
opcode: 0,
}
}
pub fn next_index_same_page(&self) -> Option<u32> {
if self.next_index_same_page == NO_NEXT_ENTRY {
None
}
else {
Some(self.next_index_same_page)
}
}
pub fn set_next_index_same_page(&mut self, next_index: Option<u32>) {
if let Some(i) = next_index {
self.next_index_same_page = i
}
else {
self.next_index_same_page = NO_NEXT_ENTRY
}
}
}
const DEFAULT_ENTRY: Entry = Entry {
start_addr: 0,
next_index_same_page: NO_NEXT_ENTRY,
wasm_table_index: 0,
initial_state: 0,
state_flags: CachedStateFlags::EMPTY,
pending: false,
#[cfg(any(debug_assertions, feature = "profiler"))]
len: 0,
#[cfg(debug_assertions)]
opcode: 0,
};
#[allow(non_upper_case_globals)]
pub const jit_cache_array: *mut Entry = ::global_pointers::JIT_CACHE_ARRAY as *mut Entry;
#[allow(unreachable_code)]
#[cfg(debug_assertions)]
unsafe fn _static_assert() { std::mem::transmute::<Entry, [u8; 24]>(panic!()); }
#[allow(unreachable_code)]
#[cfg(all(not(debug_assertions), not(feature = "profiler")))]
unsafe fn _static_assert() { std::mem::transmute::<Entry, [u8; 16]>(panic!()); }
// XXX: Probably doesn't need to be statically allocated
#[allow(non_upper_case_globals)]
pub const page_first_entry: *mut u32 = ::global_pointers::JIT_PAGE_FIRST_ENTRY as *mut u32;
pub fn get_page_index(page: Page) -> Option<u32> {
let index = unsafe { *page_first_entry.offset(page.to_u32() as isize) };
if index == NO_NEXT_ENTRY { None } else { Some(index) }
}
pub fn set_page_index(page: Page, index: Option<u32>) {
let index = index.unwrap_or(NO_NEXT_ENTRY);
unsafe { *page_first_entry.offset(page.to_u32() as isize) = index }
}
pub fn get(i: u32) -> &'static Entry { unsafe { &*jit_cache_array.offset(i as isize) } }
pub fn get_mut(i: u32) -> &'static mut Entry {
unsafe { &mut *jit_cache_array.offset(i as isize) }
}
fn set(i: u32, entry: Entry) {
unsafe {
*jit_cache_array.offset(i as isize) = entry
};
}
pub fn insert(index: u32, mut entry: Entry) {
let page = Page::page_of(entry.start_addr);
let previous_entry_index = get_page_index(page);
if let Some(previous_entry_index) = previous_entry_index {
let previous_entry = get(previous_entry_index);
if previous_entry.start_addr != 0 {
dbg_assert!(
Page::page_of(previous_entry.start_addr) == Page::page_of(entry.start_addr)
);
}
}
set_page_index(page, Some(index));
entry.set_next_index_same_page(previous_entry_index);
set(index, entry);
}
pub fn remove(index: u32) {
let page = Page::page_of((get(index)).start_addr);
let mut page_index = get_page_index(page);
let mut did_remove = false;
if page_index == Some(index) {
set_page_index(page, (get(index)).next_index_same_page());
did_remove = true;
}
else {
while let Some(page_index_ok) = page_index {
let next_index = (get(page_index_ok)).next_index_same_page();
if next_index == Some(index) {
(get_mut(page_index_ok))
.set_next_index_same_page((get(index)).next_index_same_page());
did_remove = true;
break;
}
page_index = next_index;
}
}
(get_mut(index)).set_next_index_same_page(None);
dbg_assert!(did_remove);
}
pub fn clear() {
unsafe {
for i in 0..SIZE {
*jit_cache_array.offset(i as isize) = DEFAULT_ENTRY;
}
for i in 0..0x100000 {
*page_first_entry.offset(i) = NO_NEXT_ENTRY;
}
}
}
pub fn check_invariants() {
if !::jit::CHECK_JIT_CACHE_ARRAY_INVARIANTS {
return;
}
// there are no loops in the linked lists
// https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_Tortoise_and_Hare
for i in 0..(1 << 20) {
let mut slow = get_page_index(Page::page_of(i << 12));
let mut fast = slow;
while let Some(fast_ok) = fast {
fast = (get(fast_ok)).next_index_same_page();
slow = (get(slow.unwrap())).next_index_same_page();
if let Some(fast_ok) = fast {
fast = (get(fast_ok)).next_index_same_page();
}
else {
break;
}
dbg_assert!(slow != fast);
}
}
let mut wasm_table_index_to_jit_cache_index = [0; ::jit::WASM_TABLE_SIZE as usize];
for i in 0..SIZE {
let entry = get(i);
dbg_assert!(entry.next_index_same_page().map_or(true, |i| i < SIZE));
if entry.pending {
dbg_assert!(entry.start_addr != 0);
dbg_assert!(entry.wasm_table_index != 0);
}
else {
// an invalid entry has both its start_addr and wasm_table_index set to 0
// neither start_addr nor wasm_table_index are 0 for any valid entry
dbg_assert!((entry.start_addr == 0) == (entry.wasm_table_index == 0));
}
// having a next entry implies validity
dbg_assert!(entry.next_index_same_page() == None || entry.start_addr != 0);
// any valid wasm_table_index can only be used within a single page
if entry.wasm_table_index != 0 {
let j = wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize];
if j != 0 {
let other_entry = get(j);
dbg_assert!(other_entry.wasm_table_index == entry.wasm_table_index);
dbg_assert!(
Page::page_of(other_entry.start_addr) == Page::page_of(entry.start_addr)
);
}
else {
wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize] = i as u32;
}
}
if entry.start_addr != 0 {
// valid entries can be reached from page_first_entry
let mut reached = false;
let page = Page::page_of(entry.start_addr);
let mut cache_array_index = get_page_index(page);
while let Some(index) = cache_array_index {
let other_entry = get(index);
if i as u32 == index {
reached = true;
break;
}
cache_array_index = other_entry.next_index_same_page();
}
dbg_assert!(reached);
}
}
}
}
pub struct JitState {
// as an alternative to HashSet, we could use a bitmap of 4096 bits here
// (faster, but uses much more memory)
// or a compressed bitmap (likely faster)
hot_code_addresses: [u32; HASH_PRIME as usize],
wasm_table_index_free_list: Vec<u16>,
wasm_table_index_pending_free: Vec<u16>,
entry_points: HashMap<Page, HashSet<u16>>,
wasm_builder: WasmBuilder,
}
impl JitState {
pub fn create_and_initialise() -> JitState {
let mut wasm_builder = WasmBuilder::new();
jit_cache_array::clear();
wasm_builder.init();
// don't assign 0 (XXX: Check)
let wasm_table_indices = 1..=(WASM_TABLE_SIZE - 1) as u16;
let mut c = JitState {
hot_code_addresses: [0; HASH_PRIME as usize],
wasm_table_index_free_list: Vec::from_iter(wasm_table_indices),
wasm_table_index_pending_free: vec![],
entry_points: HashMap::new(),
wasm_builder,
};
jit_empty_cache(&mut c);
c
}
}
#[derive(PartialEq, Eq)]
enum BasicBlockType {
Normal {
next_block_addr: u32,
},
ConditionalJump {
next_block_addr: Option<u32>,
next_block_branch_taken_addr: Option<u32>,
condition: u8,
jump_offset: i32,
jump_offset_is_32: bool,
},
Exit,
}
struct BasicBlock {
addr: u32,
last_instruction_addr: u32,
end_addr: u32,
is_entry_block: bool,
ty: BasicBlockType,
number_of_instructions: u32,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub struct cached_code {
pub wasm_table_index: u16,
pub initial_state: u16,
}
impl cached_code {
const NONE: cached_code = cached_code {
wasm_table_index: 0,
initial_state: 0,
};
}
pub struct JitContext<'a> {
pub cpu: &'a mut CpuContext,
pub builder: &'a mut WasmBuilder,
pub register_locals: &'a mut Vec<WasmLocal>,
pub start_of_current_instruction: u32,
pub current_brtable_depth: u32,
}
pub const JIT_INSTR_BLOCK_BOUNDARY_FLAG: u32 = 1 << 0;
fn jit_hot_hash_page(page: Page) -> u32 { page.to_u32() % HASH_PRIME }
fn is_near_end_of_page(address: u32) -> bool { address & 0xFFF >= 0x1000 - MAX_INSTRUCTION_LENGTH }
pub fn jit_find_cache_entry(phys_address: u32, state_flags: CachedStateFlags) -> cached_code {
if is_near_end_of_page(phys_address) {
profiler::stat_increment(stat::RUN_INTERPRETED_NEAR_END_OF_PAGE);
}
let mut run_interpreted_reason = None;
for i in 0..CODE_CACHE_SEARCH_SIZE {
let index = (phys_address + i) & jit_cache_array::MASK;
let entry = jit_cache_array::get(index);
if entry.start_addr == phys_address {
if entry.pending {
run_interpreted_reason = Some(stat::RUN_INTERPRETED_PENDING)
}
if entry.state_flags != state_flags {
run_interpreted_reason = Some(stat::RUN_INTERPRETED_DIFFERENT_STATE)
}
}
if is_near_end_of_page(phys_address) {
dbg_assert!(entry.start_addr != phys_address);
}
if !entry.pending && entry.start_addr == phys_address && entry.state_flags == state_flags {
#[cfg(debug_assertions)] // entry.opcode is not defined otherwise
{
dbg_assert!(cpu::read32(entry.start_addr) == entry.opcode);
}
return cached_code {
wasm_table_index: entry.wasm_table_index,
initial_state: entry.initial_state,
};
}
}
if let Some(reason) = run_interpreted_reason {
profiler::stat_increment(reason);
}
cached_code::NONE
}
pub fn record_entry_point(ctx: &mut JitState, phys_address: u32) {
if is_near_end_of_page(phys_address) {
return;
}
let page = Page::page_of(phys_address);
let offset_in_page = phys_address as u16 & 0xFFF;
let mut is_new = false;
ctx.entry_points
.entry(page)
.or_insert_with(|| {
is_new = true;
HashSet::new()
})
.insert(offset_in_page);
if is_new {
cpu::tlb_set_has_code(page, true);
}
}
fn jit_find_basic_blocks(
page: Page,
entry_points: &HashSet<u16>,
cpu: CpuContext,
) -> (Vec<BasicBlock>, bool) {
let mut to_visit_stack: Vec<u16> = entry_points.iter().cloned().collect();
let mut marked_as_entry: HashSet<u16> = entry_points.clone();
let page_high_bits = page.to_address();
let mut basic_blocks: BTreeMap<u32, BasicBlock> = BTreeMap::new();
let mut requires_loop_limit = false;
while let Some(to_visit_offset) = to_visit_stack.pop() {
let to_visit = to_visit_offset as u32 | page_high_bits;
if basic_blocks.contains_key(&to_visit) {
continue;
}
if is_near_end_of_page(to_visit) {
// Empty basic block, don't insert
profiler::stat_increment(stat::COMPILE_CUT_OFF_AT_END_OF_PAGE);
continue;
}
let mut current_address = to_visit;
let mut current_block = BasicBlock {
addr: current_address,
last_instruction_addr: 0,
end_addr: 0,
ty: BasicBlockType::Exit,
is_entry_block: false,
number_of_instructions: 0,
};
loop {
let addr_before_instruction = current_address;
let mut ctx = &mut CpuContext {
eip: current_address,
..cpu
};
let analysis = ::analysis::analyze_step(&mut ctx);
current_block.number_of_instructions += 1;
let has_next_instruction = !analysis.no_next_instruction;
current_address = ctx.eip;
match analysis.ty {
AnalysisType::Normal => {
dbg_assert!(has_next_instruction);
if basic_blocks.contains_key(&current_address) {
current_block.last_instruction_addr = addr_before_instruction;
current_block.end_addr = current_address;
dbg_assert!(!is_near_end_of_page(current_address));
current_block.ty = BasicBlockType::Normal {
next_block_addr: current_address,
};
break;
}
},
AnalysisType::Jump {
offset,
is_32,
condition: Some(condition),
} => {
// conditional jump: continue at next and continue at jump target
let jump_target = if is_32 {
current_address.wrapping_add(offset as u32)
}
else {
ctx.cs_offset.wrapping_add(
(current_address
.wrapping_sub(ctx.cs_offset)
.wrapping_add(offset as u32))
& 0xFFFF,
)
};
dbg_assert!(has_next_instruction);
to_visit_stack.push(current_address as u16 & 0xFFF);
let next_block_branch_taken_addr;
if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) {
to_visit_stack.push(jump_target as u16 & 0xFFF);
next_block_branch_taken_addr = Some(jump_target);
// Very simple heuristic for "infinite loops": This
// detects Linux's "calibrating delay loop"
if jump_target == current_block.addr {
dbg_log!("Basic block looping back to front");
requires_loop_limit = true;
}
}
else {
next_block_branch_taken_addr = None;
}
let next_block_addr = if is_near_end_of_page(current_address) {
None
}
else {
Some(current_address)
};
current_block.ty = BasicBlockType::ConditionalJump {
next_block_addr,
next_block_branch_taken_addr,
condition,
jump_offset: offset,
jump_offset_is_32: is_32,
};
current_block.last_instruction_addr = addr_before_instruction;
current_block.end_addr = current_address;
break;
},
AnalysisType::Jump {
offset,
is_32,
condition: None,
} => {
// non-conditional jump: continue at jump target
let jump_target = if is_32 {
current_address.wrapping_add(offset as u32)
}
else {
ctx.cs_offset.wrapping_add(
(current_address
.wrapping_sub(ctx.cs_offset)
.wrapping_add(offset as u32))
& 0xFFFF,
)
};
if has_next_instruction {
// Execution will eventually come back to the next instruction (CALL)
marked_as_entry.insert(current_address as u16 & 0xFFF);
to_visit_stack.push(current_address as u16 & 0xFFF);
}
if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) {
current_block.ty = BasicBlockType::Normal {
next_block_addr: jump_target,
};
to_visit_stack.push(jump_target as u16 & 0xFFF);
}
else {
current_block.ty = BasicBlockType::Exit;
}
current_block.last_instruction_addr = addr_before_instruction;
current_block.end_addr = current_address;
break;
},
AnalysisType::BlockBoundary => {
// a block boundary but not a jump, get out
if has_next_instruction {
// block boundary, but execution will eventually come back
// to the next instruction. Create a new basic block
// starting at the next instruction and register it as an
// entry point
marked_as_entry.insert(current_address as u16 & 0xFFF);
to_visit_stack.push(current_address as u16 & 0xFFF);
}
current_block.last_instruction_addr = addr_before_instruction;
current_block.end_addr = current_address;
break;
},
}
if is_near_end_of_page(current_address) {
current_block.last_instruction_addr = addr_before_instruction;
current_block.end_addr = current_address;
profiler::stat_increment(stat::COMPILE_CUT_OFF_AT_END_OF_PAGE);
break;
}
}
let previous_block = basic_blocks
.range(..current_block.addr)
.next_back()
.map(|(_, previous_block)| (previous_block.addr, previous_block.end_addr));
if let Some((start_addr, end_addr)) = previous_block {
if current_block.addr < end_addr {
// If this block overlaps with the previous block, re-analyze the previous block
let old_block = basic_blocks.remove(&start_addr);
dbg_assert!(old_block.is_some());
to_visit_stack.push(start_addr as u16 & 0xFFF);
// Note that this does not ensure the invariant that two consecutive blocks don't
// overlay. For that, we also need to check the following block.
}
}
dbg_assert!(current_block.addr < current_block.end_addr);
dbg_assert!(current_block.addr <= current_block.last_instruction_addr);
dbg_assert!(current_block.last_instruction_addr < current_block.end_addr);
basic_blocks.insert(current_block.addr, current_block);
}
for block in basic_blocks.values_mut() {
if marked_as_entry.contains(&(block.addr as u16 & 0xFFF)) {
block.is_entry_block = true;
}
}
let basic_blocks: Vec<BasicBlock> = basic_blocks.into_iter().map(|(_, block)| block).collect();
for i in 0..basic_blocks.len() - 1 {
let next_block_addr = basic_blocks[i + 1].addr;
let next_block_end_addr = basic_blocks[i + 1].end_addr;
let next_block_is_entry = basic_blocks[i + 1].is_entry_block;
let block = &basic_blocks[i];
dbg_assert!(block.addr < next_block_addr);
if next_block_addr < block.end_addr {
dbg_log!(
"Overlapping first=[from={:x} to={:x} is_entry={}] second=[from={:x} to={:x} is_entry={}]",
block.addr,
block.end_addr,
block.is_entry_block as u8,
next_block_addr,
next_block_end_addr,
next_block_is_entry as u8
);
}
}
(basic_blocks, requires_loop_limit)
}
fn create_cache_entry(ctx: &mut JitState, entry: jit_cache_array::Entry) {
let mut found_entry_index = None;
let phys_addr = entry.start_addr;
for i in 0..CODE_CACHE_SEARCH_SIZE {
let addr_index = (phys_addr + i) & jit_cache_array::MASK;
let existing_entry = jit_cache_array::get(addr_index);
if existing_entry.start_addr == entry.start_addr
&& existing_entry.state_flags == entry.state_flags
{
profiler::stat_increment(stat::COMPILE_DUPLICATE_ENTRY);
}
if existing_entry.start_addr == 0 {
found_entry_index = Some(addr_index);
break;
}
}
let found_entry_index = match found_entry_index {
Some(i) => i,
None => {
profiler::stat_increment(stat::CACHE_MISMATCH);
// no free slots, overwrite the first one
let found_entry_index = phys_addr & jit_cache_array::MASK;
let old_entry = jit_cache_array::get_mut(found_entry_index);
// if we're here, we expect to overwrite a valid index
dbg_assert!(old_entry.start_addr != 0);
dbg_assert!(old_entry.wasm_table_index != 0);
if old_entry.wasm_table_index == entry.wasm_table_index {
profiler::stat_increment(stat::INVALIDATE_SINGLE_ENTRY_CACHE_FULL);
dbg_assert!(old_entry.pending);
dbg_assert!(Page::page_of(old_entry.start_addr) == Page::page_of(phys_addr));
// The old entry belongs to the same wasm table index as this entry.
// *Don't* free the wasm table index, instead just delete the old entry
// and use its slot for this entry.
// TODO: Optimally, we should pick another slot instead of dropping
// an entry has just been created.
jit_cache_array::remove(found_entry_index);
dbg_assert!(old_entry.next_index_same_page() == None);
old_entry.pending = false;
old_entry.start_addr = 0;
}
else {
profiler::stat_increment(stat::INVALIDATE_MODULE_CACHE_FULL);
let old_wasm_table_index = old_entry.wasm_table_index;
let old_page = Page::page_of(old_entry.start_addr);
remove_jit_cache_wasm_index(ctx, old_page, old_wasm_table_index);
//jit_cache_array::check_invariants();
// old entry should be removed after calling remove_jit_cache_wasm_index
dbg_assert!(!old_entry.pending);
dbg_assert!(old_entry.start_addr == 0);
dbg_assert!(old_entry.wasm_table_index == 0);
dbg_assert!(old_entry.next_index_same_page() == None);
}
found_entry_index
},
};
jit_cache_array::insert(found_entry_index, entry);
}
#[cfg(debug_assertions)]
pub fn jit_force_generate_unsafe(
ctx: &mut JitState,
phys_addr: u32,
cs_offset: u32,
state_flags: CachedStateFlags,
) {
record_entry_point(ctx, phys_addr);
jit_analyze_and_generate(ctx, Page::page_of(phys_addr), cs_offset, state_flags);
}
#[inline(never)]
fn jit_analyze_and_generate(
ctx: &mut JitState,
page: Page,
cs_offset: u32,
state_flags: CachedStateFlags,
) {
profiler::stat_increment(stat::COMPILE);
let entry_points = ctx.entry_points.remove(&page);
let cpu = CpuContext {
eip: 0,
prefixes: 0,
cs_offset,
state_flags,
};
if let Some(entry_points) = entry_points {
let (basic_blocks, requires_loop_limit) =
jit_find_basic_blocks(page, &entry_points, cpu.clone());
//for b in basic_blocks.iter() {
// dbg_log!(
// "> Basic block from {:x} to {:x}, is_entry={}",
// b.addr,
// b.end_addr,
// b.is_entry_block
// );
//}
if ctx.wasm_table_index_free_list.is_empty() {
dbg_log!(
"wasm_table_index_free_list empty ({} pending_free), clearing cache",
ctx.wasm_table_index_pending_free.len(),
);
// When no free slots are available, delete all cached modules. We could increase the
// size of the table, but this way the initial size acts as an upper bound for the
// number of wasm modules that we generate, which we want anyway to avoid getting our
// tab killed by browsers due to memory constraints.
cpu::jit_clear_cache();
profiler::stat_increment(stat::INVALIDATE_ALL_MODULES_NO_FREE_WASM_INDICES);
dbg_log!(
"after jit_clear_cache: {} pending_free {} free",
ctx.wasm_table_index_pending_free.len(),
ctx.wasm_table_index_free_list.len(),
);
// This assertion can fail if all entries are pending (not possible unless
// WASM_TABLE_SIZE is set very low)
dbg_assert!(!ctx.wasm_table_index_free_list.is_empty());
}
// allocate an index in the wasm table
let wasm_table_index = ctx
.wasm_table_index_free_list
.pop()
.expect("allocate wasm table index");
dbg_assert!(wasm_table_index != 0);
jit_generate_module(
&basic_blocks,
requires_loop_limit,
cpu.clone(),
&mut ctx.wasm_builder,
);
// create entries for each basic block that is marked as an entry point
let mut entry_point_count = 0;
for (i, block) in basic_blocks.iter().enumerate() {
profiler::stat_increment(stat::COMPILE_BASIC_BLOCK);
if block.is_entry_block && block.addr != block.end_addr {
dbg_assert!(block.addr != 0);
let initial_state = i.safe_to_u16();
#[allow(unused_mut)]
let mut entry = jit_cache_array::Entry::create(
block.addr,
None, // to be filled in by create_cache_entry
wasm_table_index,
initial_state,
state_flags,
true,
);
#[cfg(any(debug_assertions, feature = "profiler"))]
{
entry.len = block.end_addr - block.addr;
}
#[cfg(debug_assertions)]
{
entry.opcode = cpu::read32(block.addr);
}
create_cache_entry(ctx, entry);
entry_point_count += 1;
profiler::stat_increment(stat::COMPILE_ENTRY_POINT);
}
}
profiler::stat_increment_by(
stat::COMPILE_WASM_TOTAL_BYTES,
::c_api::jit_get_op_len() as u64,
);
dbg_assert!(entry_point_count > 0);
cpu::tlb_set_has_code(page, true);
jit_cache_array::check_invariants();
cpu::check_tlb_invariants();
let end_addr = 0;
let first_opcode = 0;
let phys_addr = page.to_address();
// will call codegen_finalize_finished asynchronously when finished
cpu::codegen_finalize(
wasm_table_index,
phys_addr,
end_addr,
first_opcode,
state_flags,
);
profiler::stat_increment(stat::COMPILE_SUCCESS);
}
else {
//dbg_log("No basic blocks, not generating code");
// Nothing to do
}
}
pub fn codegen_finalize_finished(
ctx: &mut JitState,
wasm_table_index: u16,
phys_addr: u32,
_end_addr: u32,
_first_opcode: u32,
_state_flags: CachedStateFlags,
) {
dbg_assert!(wasm_table_index != 0);
match ctx
.wasm_table_index_pending_free
.iter()
.position(|i| *i == wasm_table_index)
{
Some(i) => {
ctx.wasm_table_index_pending_free.swap_remove(i);
free_wasm_table_index(ctx, wasm_table_index);
},
None => {
let page = Page::page_of(phys_addr);
let mut cache_array_index = jit_cache_array::get_page_index(page);
while let Some(index) = cache_array_index {
let mut entry = jit_cache_array::get_mut(index);
if (*entry).wasm_table_index == wasm_table_index {
dbg_assert!((*entry).pending);
(*entry).pending = false;
}
cache_array_index = (*entry).next_index_same_page();
}
},
}
jit_cache_array::check_invariants();
if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
// sanity check that the above iteration marked all entries as not pending
for i in 0..jit_cache_array::SIZE {
let entry = jit_cache_array::get(i);
if entry.wasm_table_index == wasm_table_index {
dbg_assert!(!entry.pending);
}
}
}
}
fn jit_generate_module(
basic_blocks: &Vec<BasicBlock>,
requires_loop_limit: bool,
mut cpu: CpuContext,
builder: &mut WasmBuilder,
) {
builder.reset();
let basic_block_indices: HashMap<u32, u32> = basic_blocks
.iter()
.enumerate()
.map(|(index, block)| (block.addr, index as u32))
.collect();
// set state local variable to the initial state passed as the first argument
builder
.instruction_body
.get_local(&builder.arg_local_initial_state);
let gen_local_state = builder.set_new_local();
// initialise max_iterations
let gen_local_iteration_counter = if JIT_ALWAYS_USE_LOOP_SAFETY || requires_loop_limit {
builder
.instruction_body
.const_i32(JIT_MAX_ITERATIONS_PER_FUNCTION as i32);
Some(builder.set_new_local())
}
else {
None
};
let mut register_locals = (0..8)
.map(|i| {
builder
.instruction_body
.const_i32(global_pointers::get_reg32_offset(i) as i32);
builder.instruction_body.load_aligned_i32_from_stack(0);
let local = builder.set_new_local();
local
})
.collect();
let ctx = &mut JitContext {
cpu: &mut cpu,
builder,
register_locals: &mut register_locals,
start_of_current_instruction: 0,
current_brtable_depth: 0,
};
// main state machine loop
ctx.builder.instruction_body.loop_void();
if let Some(gen_local_iteration_counter) = gen_local_iteration_counter.as_ref() {
profiler::stat_increment(stat::COMPILE_WITH_LOOP_SAFETY);
// decrement max_iterations
ctx.builder
.instruction_body
.get_local(gen_local_iteration_counter);
ctx.builder.instruction_body.const_i32(-1);
ctx.builder.instruction_body.add_i32();
ctx.builder
.instruction_body
.set_local(gen_local_iteration_counter);
// if max_iterations == 0: return
ctx.builder
.instruction_body
.get_local(gen_local_iteration_counter);
ctx.builder.instruction_body.eqz_i32();
ctx.builder.instruction_body.if_void();
codegen::gen_debug_track_jit_exit(ctx.builder, 0);
codegen::gen_move_registers_from_locals_to_memory(ctx);
ctx.builder.instruction_body.return_();
ctx.builder.instruction_body.block_end();
}
ctx.builder.instruction_body.block_void(); // for the default case
ctx.builder.instruction_body.block_void(); // for the exit-with-pagefault case
// generate the opening blocks for the cases
for _ in 0..basic_blocks.len() {
ctx.builder.instruction_body.block_void();
}
ctx.builder.instruction_body.get_local(&gen_local_state);
ctx.builder
.instruction_body
.brtable_and_cases(basic_blocks.len() as u32 + 1); // plus one for the exit-with-pagefault case
for (i, block) in basic_blocks.iter().enumerate() {
// Case [i] will jump after the [i]th block, so we first generate the
// block end opcode and then the code for that block
ctx.builder.instruction_body.block_end();
ctx.current_brtable_depth = basic_blocks.len() as u32 + 1 - i as u32;
dbg_assert!(block.addr < block.end_addr);
jit_generate_basic_block(ctx, block);
let invalid_connection_to_next_block = block.end_addr != ctx.cpu.eip;
dbg_assert!(!invalid_connection_to_next_block);
match &block.ty {
BasicBlockType::Exit => {
// Exit this function
codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr);
codegen::gen_move_registers_from_locals_to_memory(ctx);
ctx.builder.instruction_body.return_();
},
BasicBlockType::Normal { next_block_addr } => {
// Unconditional jump to next basic block
// - All instructions that don't change eip
// - Unconditional jump
let next_basic_block_index = *basic_block_indices
.get(&next_block_addr)
.expect("basic_block_indices.get (Normal)");
if next_basic_block_index == (i as u32) + 1 {
// fallthru
}
else {
// set state variable to next basic block
ctx.builder
.instruction_body
.const_i32(next_basic_block_index as i32);
ctx.builder.instruction_body.set_local(&gen_local_state);
ctx.builder.instruction_body.br(ctx.current_brtable_depth); // to the loop
}
},
&BasicBlockType::ConditionalJump {
next_block_addr,
next_block_branch_taken_addr,
condition,
jump_offset,
jump_offset_is_32,
} => {
// Conditional jump to next basic block
// - jnz, jc, loop, jcxz, etc.
codegen::gen_condition_fn(ctx, condition);
ctx.builder.instruction_body.if_void();
// Branch taken
if jump_offset_is_32 {
codegen::gen_relative_jump(ctx.builder, jump_offset);
}
else {
codegen::gen_jmp_rel16(ctx.builder, jump_offset as u16);
}
if let Some(next_block_branch_taken_addr) = next_block_branch_taken_addr {
let next_basic_block_branch_taken_index = *basic_block_indices
.get(&next_block_branch_taken_addr)
.expect("basic_block_indices.get (branch taken)");
ctx.builder
.instruction_body
.const_i32(next_basic_block_branch_taken_index as i32);
ctx.builder.instruction_body.set_local(&gen_local_state);
ctx.builder
.instruction_body
.br(basic_blocks.len() as u32 + 2 - i as u32); // to the loop
}
else {
// Jump to different page
codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr);
codegen::gen_move_registers_from_locals_to_memory(ctx);
ctx.builder.instruction_body.return_();
}
if let Some(next_block_addr) = next_block_addr {
// Branch not taken
let next_basic_block_index = *basic_block_indices
.get(&next_block_addr)
.expect("basic_block_indices.get (branch not taken)");
if next_basic_block_index == (i as u32) + 1 {
// fallthru
ctx.builder.instruction_body.block_end();
}
else {
ctx.builder.instruction_body.else_();
ctx.builder
.instruction_body
.const_i32(next_basic_block_index as i32);
ctx.builder.instruction_body.set_local(&gen_local_state);
ctx.builder
.instruction_body
.br(basic_blocks.len() as u32 + 2 - i as u32); // to the loop
ctx.builder.instruction_body.block_end();
}
}
else {
ctx.builder.instruction_body.else_();
// End of this page
codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr);
codegen::gen_move_registers_from_locals_to_memory(ctx);
ctx.builder.instruction_body.return_();
ctx.builder.instruction_body.block_end();
}
},
}
}
{
// exit-with-pagefault case
ctx.builder.instruction_body.block_end();
codegen::gen_move_registers_from_locals_to_memory(ctx);
codegen::gen_fn0_const(ctx.builder, "trigger_pagefault_end_jit");
codegen::gen_clear_prefixes(ctx);
ctx.builder.instruction_body.return_();
}
ctx.builder.instruction_body.block_end(); // default case
ctx.builder.instruction_body.unreachable();
ctx.builder.instruction_body.block_end(); // loop
ctx.builder.free_local(gen_local_state);
if let Some(local) = gen_local_iteration_counter {
ctx.builder.free_local(local);
}
for local in ctx.register_locals.drain(..) {
ctx.builder.free_local(local);
}
ctx.builder.finish();
}
fn jit_generate_basic_block(ctx: &mut JitContext, block: &BasicBlock) {
let start_addr = block.addr;
let last_instruction_addr = block.last_instruction_addr;
let stop_addr = block.end_addr;
// First iteration of do-while assumes the caller confirms this condition
dbg_assert!(!is_near_end_of_page(start_addr));
codegen::gen_increment_timestamp_counter(ctx.builder, block.number_of_instructions as i32);
ctx.cpu.eip = start_addr;
loop {
let mut instruction = 0;
if cfg!(feature = "profiler") {
instruction = cpu::read32(ctx.cpu.eip);
::opstats::gen_opstats(ctx.builder, instruction);
::opstats::record_opstat_compiled(instruction);
}
if ctx.cpu.eip == last_instruction_addr {
// Before the last instruction:
// - Set eip to *after* the instruction
// - Set previous_eip to *before* the instruction
codegen::gen_set_previous_eip_offset_from_eip(
ctx.builder,
last_instruction_addr - start_addr,
);
codegen::gen_increment_instruction_pointer(ctx.builder, stop_addr - start_addr);
}
let wasm_length_before = ctx.builder.instruction_body.len();
ctx.start_of_current_instruction = ctx.cpu.eip;
let start_eip = ctx.cpu.eip;
let mut instruction_flags = 0;
jit_instructions::jit_instruction(ctx, &mut instruction_flags);
let end_eip = ctx.cpu.eip;
let instruction_length = end_eip - start_eip;
let was_block_boundary = instruction_flags & JIT_INSTR_BLOCK_BOUNDARY_FLAG != 0;
let wasm_length = ctx.builder.instruction_body.len() - wasm_length_before;
::opstats::record_opstat_size_wasm(instruction, wasm_length as u32);
dbg_assert!((end_eip == stop_addr) == (start_eip == last_instruction_addr));
dbg_assert!(instruction_length < MAX_INSTRUCTION_LENGTH);
let end_addr = ctx.cpu.eip;
if end_addr == stop_addr {
// no page was crossed
dbg_assert!(Page::page_of(end_addr) == Page::page_of(start_addr));
break;
}
if was_block_boundary || is_near_end_of_page(end_addr) || end_addr > stop_addr {
dbg_log!(
"Overlapping basic blocks start={:x} expected_end={:x} end={:x} was_block_boundary={} near_end_of_page={}",
start_addr,
stop_addr,
end_addr,
was_block_boundary,
is_near_end_of_page(end_addr)
);
dbg_assert!(false);
break;
}
}
}
pub fn jit_increase_hotness_and_maybe_compile(
ctx: &mut JitState,
phys_address: u32,
cs_offset: u32,
state_flags: CachedStateFlags,
hotness: u32,
) {
let page = Page::page_of(phys_address);
let address_hash = jit_hot_hash_page(page) as usize;
ctx.hot_code_addresses[address_hash] += hotness;
if ctx.hot_code_addresses[address_hash] >= JIT_THRESHOLD {
ctx.hot_code_addresses[address_hash] = 0;
jit_analyze_and_generate(ctx, page, cs_offset, state_flags)
};
}
fn free_wasm_table_index(ctx: &mut JitState, wasm_table_index: u16) {
if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
dbg_assert!(!ctx.wasm_table_index_free_list.contains(&wasm_table_index));
}
ctx.wasm_table_index_free_list.push(wasm_table_index);
// It is not strictly necessary to clear the function, but it will fail more predictably if we
// accidentally use the function and may garbage collect unused modules earlier
cpu::jit_clear_func(wasm_table_index);
}
/// Remove all entries with the given wasm_table_index in page
fn remove_jit_cache_wasm_index(ctx: &mut JitState, page: Page, wasm_table_index: u16) {
let mut cache_array_index = jit_cache_array::get_page_index(page).unwrap();
let mut pending = false;
loop {
let entry = jit_cache_array::get_mut(cache_array_index);
let next_cache_array_index = entry.next_index_same_page();
if entry.wasm_table_index == wasm_table_index {
// if one entry is pending, all must be pending
dbg_assert!(!pending || entry.pending);
pending = entry.pending;
jit_cache_array::remove(cache_array_index);
dbg_assert!(entry.next_index_same_page() == None);
entry.wasm_table_index = 0;
entry.start_addr = 0;
entry.pending = false;
}
if let Some(i) = next_cache_array_index {
cache_array_index = i;
}
else {
break;
}
}
if pending {
ctx.wasm_table_index_pending_free.push(wasm_table_index);
}
else {
free_wasm_table_index(ctx, wasm_table_index);
}
if !jit_page_has_code(ctx, page) {
cpu::tlb_set_has_code(page, false);
}
if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
// sanity check that the above iteration deleted all entries
for i in 0..jit_cache_array::SIZE {
let entry = jit_cache_array::get(i);
dbg_assert!(entry.wasm_table_index != wasm_table_index);
}
}
}
/// Register a write in this page: Delete all present code
pub fn jit_dirty_page(ctx: &mut JitState, page: Page) {
let mut did_have_code = false;
if let Some(mut cache_array_index) = jit_cache_array::get_page_index(page) {
did_have_code = true;
let mut index_to_free = HashSet::new();
let mut index_to_pending_free = HashSet::new();
jit_cache_array::set_page_index(page, None);
profiler::stat_increment(stat::INVALIDATE_PAGE);
loop {
profiler::stat_increment(stat::INVALIDATE_CACHE_ENTRY);
let entry = jit_cache_array::get_mut(cache_array_index);
let wasm_table_index = entry.wasm_table_index;
dbg_assert!(page == Page::page_of(entry.start_addr));
let next_cache_array_index = entry.next_index_same_page();
entry.set_next_index_same_page(None);
entry.start_addr = 0;
entry.wasm_table_index = 0;
if entry.pending {
dbg_assert!(!index_to_free.contains(&wasm_table_index));
entry.pending = false;
index_to_pending_free.insert(wasm_table_index);
}
else {
dbg_assert!(!index_to_pending_free.contains(&wasm_table_index));
index_to_free.insert(wasm_table_index);
}
if let Some(i) = next_cache_array_index {
cache_array_index = i;
}
else {
break;
}
}
profiler::stat_increment_by(
stat::INVALIDATE_MODULE,
index_to_pending_free.len() as u64 + index_to_free.len() as u64,
);
for index in index_to_free.iter().cloned() {
free_wasm_table_index(ctx, index)
}
for index in index_to_pending_free {
ctx.wasm_table_index_pending_free.push(index);
}
}
match ctx.entry_points.remove(&page) {
None => {},
Some(_entry_points) => {
did_have_code = true;
// don't try to compile code in this page anymore until it's hot again
ctx.hot_code_addresses[jit_hot_hash_page(page) as usize] = 0;
},
}
if did_have_code {
cpu::tlb_set_has_code(page, false);
}
}
pub fn jit_dirty_cache(ctx: &mut JitState, start_addr: u32, end_addr: u32) {
dbg_assert!(start_addr < end_addr);
let start_page = Page::page_of(start_addr);
let end_page = Page::page_of(end_addr - 1);
for page in start_page.to_u32()..end_page.to_u32() + 1 {
jit_dirty_page(ctx, Page::page_of(page << 12));
}
}
pub fn jit_dirty_cache_small(ctx: &mut JitState, start_addr: u32, end_addr: u32) {
dbg_assert!(start_addr < end_addr);
let start_page = Page::page_of(start_addr);
let end_page = Page::page_of(end_addr - 1);
jit_dirty_page(ctx, start_page);
// Note: This can't happen when paging is enabled, as writes across
// boundaries are split up on two pages
if start_page != end_page {
dbg_assert!(start_page.to_u32() + 1 == end_page.to_u32());
jit_dirty_page(ctx, end_page);
}
}
pub fn jit_empty_cache(ctx: &mut JitState) {
ctx.entry_points.clear();
for page_index in 0..0x100000 {
jit_dirty_page(ctx, Page::page_of(page_index << 12))
}
}
pub fn jit_page_has_code(ctx: &JitState, page: Page) -> bool {
jit_cache_array::get_page_index(page) != None || ctx.entry_points.contains_key(&page)
}
#[cfg(debug_assertions)]
pub fn jit_unused_cache_stat() -> u32 {
let mut count = 0;
for i in 0..jit_cache_array::SIZE {
if (jit_cache_array::get(i)).start_addr == 0 {
count += 1
}
}
return count;
}
#[cfg(debug_assertions)]
pub fn jit_get_entry_length(i: u32) -> u32 { (jit_cache_array::get(i)).len }
#[cfg(debug_assertions)]
pub fn jit_get_entry_address(i: u32) -> u32 { (jit_cache_array::get(i)).start_addr }
#[cfg(debug_assertions)]
pub fn jit_get_entry_pending(i: u32) -> bool { (jit_cache_array::get(i)).pending }
#[cfg(debug_assertions)]
pub fn jit_get_wasm_table_index_free_list_count(ctx: &JitState) -> u32 {
ctx.wasm_table_index_free_list.len() as u32
}
pub fn jit_get_op_len(ctx: &JitState) -> u32 { ctx.wasm_builder.get_op_len() }
pub fn jit_get_op_ptr(ctx: &JitState) -> *const u8 { ctx.wasm_builder.get_op_ptr() }
#[cfg(feature = "profiler")]
pub fn check_missed_entry_points(phys_address: u32, state_flags: CachedStateFlags) {
let page = Page::page_of(phys_address);
for i in page.to_address()..page.to_address() + 4096 {
// No need to check [CODE_CACHE_SEARCH_SIZE] entries here as we look at consecutive
// addresses anyway
let index = i & jit_cache_array::MASK;
let entry = jit_cache_array::get(index);
if !entry.pending
&& entry.state_flags == state_flags
&& phys_address >= entry.start_addr
&& phys_address < entry.start_addr + entry.len
{
profiler::stat_increment(stat::RUN_INTERPRETED_MISSED_COMPILED_ENTRY_LOOKUP);
let last_jump_type = unsafe { ::cpu2::cpu::debug_last_jump.name() };
let last_jump_addr =
unsafe { ::cpu2::cpu::debug_last_jump.phys_address() }.unwrap_or(0);
let last_jump_opcode =
if last_jump_addr != 0 { cpu::read32(last_jump_addr) } else { 0 };
let opcode = cpu::read32(phys_address);
dbg_log!(
"Compiled exists, but no entry point, \
start={:x} end={:x} phys_addr={:x} opcode={:02x} {:02x} {:02x} {:02x}. \
Last jump at {:x} ({}) opcode={:02x} {:02x} {:02x} {:02x}",
entry.start_addr,
entry.start_addr + entry.len,
phys_address,
opcode & 0xFF,
opcode >> 8 & 0xFF,
opcode >> 16 & 0xFF,
opcode >> 16 & 0xFF,
last_jump_addr,
last_jump_type,
last_jump_opcode & 0xFF,
last_jump_opcode >> 8 & 0xFF,
last_jump_opcode >> 16 & 0xFF,
last_jump_opcode >> 16 & 0xFF,
);
}
}
}