2018-08-21 19:24:35 +02:00
|
|
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
2018-11-14 21:49:13 +01:00
|
|
|
use std::iter::FromIterator;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
use analysis::AnalysisType;
|
|
|
|
use codegen;
|
|
|
|
use cpu;
|
|
|
|
use cpu_context::CpuContext;
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
use global_pointers;
|
2018-07-03 17:28:33 +02:00
|
|
|
use jit_instructions;
|
|
|
|
use page::Page;
|
|
|
|
use profiler;
|
|
|
|
use profiler::stat;
|
|
|
|
use state_flags::CachedStateFlags;
|
|
|
|
use util::SafeToU16;
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
use wasmgen::module_init::{WasmBuilder, WasmLocal};
|
2018-07-28 20:33:40 +02:00
|
|
|
use wasmgen::wasm_util::WasmBuf;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-11-20 23:28:55 +01:00
|
|
|
pub const WASM_TABLE_SIZE: u32 = 900;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
pub const HASH_PRIME: u32 = 6151;
|
|
|
|
|
|
|
|
pub const CHECK_JIT_CACHE_ARRAY_INVARIANTS: bool = false;
|
|
|
|
|
|
|
|
pub const JIT_MAX_ITERATIONS_PER_FUNCTION: u32 = 10000;
|
|
|
|
|
2018-09-18 01:34:10 +02:00
|
|
|
pub const JIT_ALWAYS_USE_LOOP_SAFETY: bool = true;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-11-20 23:28:55 +01:00
|
|
|
pub const JIT_THRESHOLD: u32 = 200 * 1000;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
const CODE_CACHE_SEARCH_SIZE: u32 = 8;
|
|
|
|
const MAX_INSTRUCTION_LENGTH: u32 = 16;
|
|
|
|
|
|
|
|
mod jit_cache_array {
|
|
|
|
use page::Page;
|
|
|
|
use state_flags::CachedStateFlags;
|
|
|
|
|
|
|
|
// Note: For performance reasons, this is global state. See jit_find_cache_entry
|
|
|
|
|
|
|
|
const NO_NEXT_ENTRY: u32 = 0xffff_ffff;
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
// When changing this, you also need to bump global-base
|
2018-07-03 17:28:33 +02:00
|
|
|
pub const SIZE: u32 = 0x40000;
|
2018-10-01 23:48:15 +02:00
|
|
|
pub const MASK: u32 = SIZE - 1;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
pub struct Entry {
|
|
|
|
pub start_addr: u32,
|
|
|
|
|
2018-09-18 00:14:10 +02:00
|
|
|
#[cfg(any(debug_assertions, feature = "profiler"))]
|
2018-07-03 17:28:33 +02:00
|
|
|
pub len: u32,
|
|
|
|
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
pub opcode: u32,
|
|
|
|
|
|
|
|
// an index into jit_cache_array for the next code_cache entry within the same physical page
|
|
|
|
next_index_same_page: u32,
|
|
|
|
|
|
|
|
pub initial_state: u16,
|
|
|
|
pub wasm_table_index: u16,
|
|
|
|
pub state_flags: CachedStateFlags,
|
|
|
|
pub pending: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Entry {
|
|
|
|
pub fn create(
|
|
|
|
start_addr: u32,
|
|
|
|
next_index_same_page: Option<u32>,
|
|
|
|
wasm_table_index: u16,
|
|
|
|
initial_state: u16,
|
|
|
|
state_flags: CachedStateFlags,
|
|
|
|
pending: bool,
|
|
|
|
) -> Entry {
|
|
|
|
let next_index_same_page = next_index_same_page.unwrap_or(NO_NEXT_ENTRY);
|
|
|
|
Entry {
|
|
|
|
start_addr,
|
|
|
|
next_index_same_page,
|
|
|
|
wasm_table_index,
|
|
|
|
initial_state,
|
|
|
|
state_flags,
|
|
|
|
pending,
|
|
|
|
|
2018-09-18 00:14:10 +02:00
|
|
|
#[cfg(any(debug_assertions, feature = "profiler"))]
|
2018-07-03 17:28:33 +02:00
|
|
|
len: 0,
|
|
|
|
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
opcode: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pub fn next_index_same_page(&self) -> Option<u32> {
|
|
|
|
if self.next_index_same_page == NO_NEXT_ENTRY {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
Some(self.next_index_same_page)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_next_index_same_page(&mut self, next_index: Option<u32>) {
|
|
|
|
if let Some(i) = next_index {
|
|
|
|
self.next_index_same_page = i
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
self.next_index_same_page = NO_NEXT_ENTRY
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const DEFAULT_ENTRY: Entry = Entry {
|
|
|
|
start_addr: 0,
|
|
|
|
next_index_same_page: NO_NEXT_ENTRY,
|
|
|
|
wasm_table_index: 0,
|
|
|
|
initial_state: 0,
|
|
|
|
state_flags: CachedStateFlags::EMPTY,
|
|
|
|
pending: false,
|
|
|
|
|
2018-09-18 00:14:10 +02:00
|
|
|
#[cfg(any(debug_assertions, feature = "profiler"))]
|
2018-07-03 17:28:33 +02:00
|
|
|
len: 0,
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
opcode: 0,
|
|
|
|
};
|
|
|
|
|
|
|
|
#[allow(non_upper_case_globals)]
|
2021-01-01 02:14:29 +01:00
|
|
|
pub const jit_cache_array: *mut Entry = ::global_pointers::JIT_CACHE_ARRAY as *mut Entry;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
#[allow(unreachable_code)]
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
unsafe fn _static_assert() { std::mem::transmute::<Entry, [u8; 24]>(panic!()); }
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
#[allow(unreachable_code)]
|
|
|
|
#[cfg(all(not(debug_assertions), not(feature = "profiler")))]
|
|
|
|
unsafe fn _static_assert() { std::mem::transmute::<Entry, [u8; 16]>(panic!()); }
|
|
|
|
|
|
|
|
// XXX: Probably doesn't need to be statically allocated
|
2018-07-03 17:28:33 +02:00
|
|
|
#[allow(non_upper_case_globals)]
|
2021-01-01 02:14:29 +01:00
|
|
|
pub const page_first_entry: *mut u32 = ::global_pointers::JIT_PAGE_FIRST_ENTRY as *mut u32;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
pub fn get_page_index(page: Page) -> Option<u32> {
|
2021-01-01 02:14:29 +01:00
|
|
|
let index = unsafe { *page_first_entry.offset(page.to_u32() as isize) };
|
2021-01-01 02:14:30 +01:00
|
|
|
if index == NO_NEXT_ENTRY { None } else { Some(index) }
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_page_index(page: Page, index: Option<u32>) {
|
|
|
|
let index = index.unwrap_or(NO_NEXT_ENTRY);
|
2021-01-01 02:14:29 +01:00
|
|
|
unsafe { *page_first_entry.offset(page.to_u32() as isize) = index }
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
pub fn get(i: u32) -> &'static Entry { unsafe { &*jit_cache_array.offset(i as isize) } }
|
|
|
|
pub fn get_mut(i: u32) -> &'static mut Entry {
|
|
|
|
unsafe { &mut *jit_cache_array.offset(i as isize) }
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
2021-01-01 02:14:30 +01:00
|
|
|
fn set(i: u32, entry: Entry) {
|
|
|
|
unsafe {
|
|
|
|
*jit_cache_array.offset(i as isize) = entry
|
|
|
|
};
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
pub fn insert(index: u32, mut entry: Entry) {
|
|
|
|
let page = Page::page_of(entry.start_addr);
|
|
|
|
|
|
|
|
let previous_entry_index = get_page_index(page);
|
|
|
|
|
|
|
|
if let Some(previous_entry_index) = previous_entry_index {
|
|
|
|
let previous_entry = get(previous_entry_index);
|
|
|
|
|
|
|
|
if previous_entry.start_addr != 0 {
|
|
|
|
dbg_assert!(
|
|
|
|
Page::page_of(previous_entry.start_addr) == Page::page_of(entry.start_addr)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
set_page_index(page, Some(index));
|
|
|
|
entry.set_next_index_same_page(previous_entry_index);
|
|
|
|
|
|
|
|
set(index, entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn remove(index: u32) {
|
2021-01-01 02:14:29 +01:00
|
|
|
let page = Page::page_of((get(index)).start_addr);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
let mut page_index = get_page_index(page);
|
|
|
|
let mut did_remove = false;
|
|
|
|
|
|
|
|
if page_index == Some(index) {
|
2021-01-01 02:14:29 +01:00
|
|
|
set_page_index(page, (get(index)).next_index_same_page());
|
2018-07-03 17:28:33 +02:00
|
|
|
did_remove = true;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
while let Some(page_index_ok) = page_index {
|
2021-01-01 02:14:29 +01:00
|
|
|
let next_index = (get(page_index_ok)).next_index_same_page();
|
2018-07-03 17:28:33 +02:00
|
|
|
if next_index == Some(index) {
|
2021-01-01 02:14:29 +01:00
|
|
|
(get_mut(page_index_ok))
|
|
|
|
.set_next_index_same_page((get(index)).next_index_same_page());
|
2018-07-03 17:28:33 +02:00
|
|
|
did_remove = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
page_index = next_index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
(get_mut(index)).set_next_index_same_page(None);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
dbg_assert!(did_remove);
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn clear() {
|
|
|
|
unsafe {
|
2021-01-01 02:14:29 +01:00
|
|
|
for i in 0..SIZE {
|
|
|
|
*jit_cache_array.offset(i as isize) = DEFAULT_ENTRY;
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
for i in 0..0x100000 {
|
|
|
|
*page_first_entry.offset(i) = NO_NEXT_ENTRY;
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn check_invariants() {
|
|
|
|
if !::jit::CHECK_JIT_CACHE_ARRAY_INVARIANTS {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// there are no loops in the linked lists
|
|
|
|
// https://en.wikipedia.org/wiki/Cycle_detection#Floyd's_Tortoise_and_Hare
|
|
|
|
for i in 0..(1 << 20) {
|
|
|
|
let mut slow = get_page_index(Page::page_of(i << 12));
|
|
|
|
let mut fast = slow;
|
|
|
|
|
|
|
|
while let Some(fast_ok) = fast {
|
2021-01-01 02:14:29 +01:00
|
|
|
fast = (get(fast_ok)).next_index_same_page();
|
|
|
|
slow = (get(slow.unwrap())).next_index_same_page();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
if let Some(fast_ok) = fast {
|
2021-01-01 02:14:29 +01:00
|
|
|
fast = (get(fast_ok)).next_index_same_page();
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
dbg_assert!(slow != fast);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut wasm_table_index_to_jit_cache_index = [0; ::jit::WASM_TABLE_SIZE as usize];
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
for i in 0..SIZE {
|
|
|
|
let entry = get(i);
|
2018-07-03 17:28:33 +02:00
|
|
|
dbg_assert!(entry.next_index_same_page().map_or(true, |i| i < SIZE));
|
|
|
|
|
|
|
|
if entry.pending {
|
|
|
|
dbg_assert!(entry.start_addr != 0);
|
|
|
|
dbg_assert!(entry.wasm_table_index != 0);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// an invalid entry has both its start_addr and wasm_table_index set to 0
|
|
|
|
// neither start_addr nor wasm_table_index are 0 for any valid entry
|
|
|
|
|
|
|
|
dbg_assert!((entry.start_addr == 0) == (entry.wasm_table_index == 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
// having a next entry implies validity
|
|
|
|
dbg_assert!(entry.next_index_same_page() == None || entry.start_addr != 0);
|
|
|
|
|
|
|
|
// any valid wasm_table_index can only be used within a single page
|
|
|
|
if entry.wasm_table_index != 0 {
|
|
|
|
let j = wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize];
|
|
|
|
|
|
|
|
if j != 0 {
|
|
|
|
let other_entry = get(j);
|
|
|
|
dbg_assert!(other_entry.wasm_table_index == entry.wasm_table_index);
|
|
|
|
dbg_assert!(
|
|
|
|
Page::page_of(other_entry.start_addr) == Page::page_of(entry.start_addr)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
wasm_table_index_to_jit_cache_index[entry.wasm_table_index as usize] = i as u32;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if entry.start_addr != 0 {
|
|
|
|
// valid entries can be reached from page_first_entry
|
|
|
|
let mut reached = false;
|
|
|
|
|
|
|
|
let page = Page::page_of(entry.start_addr);
|
|
|
|
let mut cache_array_index = get_page_index(page);
|
|
|
|
|
|
|
|
while let Some(index) = cache_array_index {
|
|
|
|
let other_entry = get(index);
|
|
|
|
|
|
|
|
if i as u32 == index {
|
|
|
|
reached = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
cache_array_index = other_entry.next_index_same_page();
|
|
|
|
}
|
|
|
|
|
|
|
|
dbg_assert!(reached);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct JitState {
|
|
|
|
// as an alternative to HashSet, we could use a bitmap of 4096 bits here
|
|
|
|
// (faster, but uses much more memory)
|
|
|
|
// or a compressed bitmap (likely faster)
|
|
|
|
hot_code_addresses: [u32; HASH_PRIME as usize],
|
|
|
|
wasm_table_index_free_list: Vec<u16>,
|
|
|
|
wasm_table_index_pending_free: Vec<u16>,
|
|
|
|
entry_points: HashMap<Page, HashSet<u16>>,
|
|
|
|
wasm_builder: WasmBuilder,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl JitState {
|
|
|
|
pub fn create_and_initialise() -> JitState {
|
|
|
|
let mut wasm_builder = WasmBuilder::new();
|
2018-11-14 21:49:13 +01:00
|
|
|
jit_cache_array::clear();
|
2018-07-03 17:28:33 +02:00
|
|
|
wasm_builder.init();
|
2018-11-14 21:49:13 +01:00
|
|
|
|
|
|
|
// don't assign 0 (XXX: Check)
|
|
|
|
let wasm_table_indices = 1..=(WASM_TABLE_SIZE - 1) as u16;
|
2018-07-03 17:28:33 +02:00
|
|
|
let mut c = JitState {
|
|
|
|
hot_code_addresses: [0; HASH_PRIME as usize],
|
2018-11-14 21:49:13 +01:00
|
|
|
wasm_table_index_free_list: Vec::from_iter(wasm_table_indices),
|
2018-07-03 17:28:33 +02:00
|
|
|
wasm_table_index_pending_free: vec![],
|
|
|
|
entry_points: HashMap::new(),
|
|
|
|
wasm_builder,
|
|
|
|
};
|
|
|
|
jit_empty_cache(&mut c);
|
|
|
|
c
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(PartialEq, Eq)]
|
|
|
|
enum BasicBlockType {
|
|
|
|
Normal {
|
|
|
|
next_block_addr: u32,
|
|
|
|
},
|
|
|
|
ConditionalJump {
|
2018-08-21 19:24:35 +02:00
|
|
|
next_block_addr: Option<u32>,
|
2018-07-03 17:28:33 +02:00
|
|
|
next_block_branch_taken_addr: Option<u32>,
|
|
|
|
condition: u8,
|
|
|
|
jump_offset: i32,
|
|
|
|
jump_offset_is_32: bool,
|
|
|
|
},
|
|
|
|
Exit,
|
|
|
|
}
|
|
|
|
|
|
|
|
struct BasicBlock {
|
|
|
|
addr: u32,
|
2018-08-21 19:24:35 +02:00
|
|
|
last_instruction_addr: u32,
|
2018-07-03 17:28:33 +02:00
|
|
|
end_addr: u32,
|
|
|
|
is_entry_block: bool,
|
|
|
|
ty: BasicBlockType,
|
2021-01-01 02:14:29 +01:00
|
|
|
number_of_instructions: u32,
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[repr(C)]
|
|
|
|
#[derive(Copy, Clone)]
|
|
|
|
pub struct cached_code {
|
|
|
|
pub wasm_table_index: u16,
|
|
|
|
pub initial_state: u16,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl cached_code {
|
|
|
|
const NONE: cached_code = cached_code {
|
|
|
|
wasm_table_index: 0,
|
|
|
|
initial_state: 0,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct JitContext<'a> {
|
|
|
|
pub cpu: &'a mut CpuContext,
|
|
|
|
pub builder: &'a mut WasmBuilder,
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
pub register_locals: &'a mut Vec<WasmLocal>,
|
2018-08-22 01:07:45 +02:00
|
|
|
pub start_of_current_instruction: u32,
|
2018-12-26 14:25:46 +01:00
|
|
|
pub current_brtable_depth: u32,
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
pub const JIT_INSTR_BLOCK_BOUNDARY_FLAG: u32 = 1 << 0;
|
|
|
|
|
|
|
|
fn jit_hot_hash_page(page: Page) -> u32 { page.to_u32() % HASH_PRIME }
|
|
|
|
|
|
|
|
fn is_near_end_of_page(address: u32) -> bool { address & 0xFFF >= 0x1000 - MAX_INSTRUCTION_LENGTH }
|
|
|
|
|
|
|
|
pub fn jit_find_cache_entry(phys_address: u32, state_flags: CachedStateFlags) -> cached_code {
|
2018-09-13 19:38:53 +02:00
|
|
|
if is_near_end_of_page(phys_address) {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::RUN_INTERPRETED_NEAR_END_OF_PAGE);
|
2018-09-13 19:38:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut run_interpreted_reason = None;
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
for i in 0..CODE_CACHE_SEARCH_SIZE {
|
|
|
|
let index = (phys_address + i) & jit_cache_array::MASK;
|
2021-01-01 02:14:29 +01:00
|
|
|
let entry = jit_cache_array::get(index);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-09-13 19:38:53 +02:00
|
|
|
if entry.start_addr == phys_address {
|
|
|
|
if entry.pending {
|
2018-09-18 20:23:05 +02:00
|
|
|
run_interpreted_reason = Some(stat::RUN_INTERPRETED_PENDING)
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
2018-09-13 19:38:53 +02:00
|
|
|
if entry.state_flags != state_flags {
|
2018-09-18 20:23:05 +02:00
|
|
|
run_interpreted_reason = Some(stat::RUN_INTERPRETED_DIFFERENT_STATE)
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-13 19:38:53 +02:00
|
|
|
if is_near_end_of_page(phys_address) {
|
|
|
|
dbg_assert!(entry.start_addr != phys_address);
|
|
|
|
}
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
if !entry.pending && entry.start_addr == phys_address && entry.state_flags == state_flags {
|
2021-01-01 02:14:30 +01:00
|
|
|
#[cfg(debug_assertions)] // entry.opcode is not defined otherwise
|
2018-07-03 17:28:33 +02:00
|
|
|
{
|
2021-01-01 02:14:30 +01:00
|
|
|
dbg_assert!(cpu::read32(entry.start_addr) == entry.opcode);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
return cached_code {
|
|
|
|
wasm_table_index: entry.wasm_table_index,
|
|
|
|
initial_state: entry.initial_state,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-13 19:38:53 +02:00
|
|
|
if let Some(reason) = run_interpreted_reason {
|
|
|
|
profiler::stat_increment(reason);
|
|
|
|
}
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
cached_code::NONE
|
|
|
|
}
|
|
|
|
|
2018-09-18 01:34:10 +02:00
|
|
|
pub fn record_entry_point(ctx: &mut JitState, phys_address: u32) {
|
2018-07-03 17:28:33 +02:00
|
|
|
if is_near_end_of_page(phys_address) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
let page = Page::page_of(phys_address);
|
|
|
|
let offset_in_page = phys_address as u16 & 0xFFF;
|
|
|
|
let mut is_new = false;
|
|
|
|
ctx.entry_points
|
|
|
|
.entry(page)
|
|
|
|
.or_insert_with(|| {
|
|
|
|
is_new = true;
|
|
|
|
HashSet::new()
|
2018-10-02 04:43:53 +02:00
|
|
|
})
|
|
|
|
.insert(offset_in_page);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
if is_new {
|
|
|
|
cpu::tlb_set_has_code(page, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn jit_find_basic_blocks(
|
|
|
|
page: Page,
|
|
|
|
entry_points: &HashSet<u16>,
|
|
|
|
cpu: CpuContext,
|
|
|
|
) -> (Vec<BasicBlock>, bool) {
|
|
|
|
let mut to_visit_stack: Vec<u16> = entry_points.iter().cloned().collect();
|
|
|
|
let mut marked_as_entry: HashSet<u16> = entry_points.clone();
|
|
|
|
let page_high_bits = page.to_address();
|
2018-08-21 19:24:35 +02:00
|
|
|
let mut basic_blocks: BTreeMap<u32, BasicBlock> = BTreeMap::new();
|
2018-07-03 17:28:33 +02:00
|
|
|
let mut requires_loop_limit = false;
|
|
|
|
|
|
|
|
while let Some(to_visit_offset) = to_visit_stack.pop() {
|
|
|
|
let to_visit = to_visit_offset as u32 | page_high_bits;
|
|
|
|
if basic_blocks.contains_key(&to_visit) {
|
|
|
|
continue;
|
|
|
|
}
|
2018-08-21 19:24:35 +02:00
|
|
|
if is_near_end_of_page(to_visit) {
|
|
|
|
// Empty basic block, don't insert
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::COMPILE_CUT_OFF_AT_END_OF_PAGE);
|
2018-08-21 19:24:35 +02:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
let mut current_address = to_visit;
|
|
|
|
let mut current_block = BasicBlock {
|
|
|
|
addr: current_address,
|
2018-08-21 19:24:35 +02:00
|
|
|
last_instruction_addr: 0,
|
2018-07-03 17:28:33 +02:00
|
|
|
end_addr: 0,
|
|
|
|
ty: BasicBlockType::Exit,
|
|
|
|
is_entry_block: false,
|
2021-01-01 02:14:29 +01:00
|
|
|
number_of_instructions: 0,
|
2018-07-03 17:28:33 +02:00
|
|
|
};
|
|
|
|
loop {
|
2018-08-21 19:24:35 +02:00
|
|
|
let addr_before_instruction = current_address;
|
2018-07-03 17:28:33 +02:00
|
|
|
let mut ctx = &mut CpuContext {
|
|
|
|
eip: current_address,
|
|
|
|
..cpu
|
|
|
|
};
|
|
|
|
let analysis = ::analysis::analyze_step(&mut ctx);
|
2021-01-01 02:14:29 +01:00
|
|
|
current_block.number_of_instructions += 1;
|
2018-07-03 17:28:33 +02:00
|
|
|
let has_next_instruction = !analysis.no_next_instruction;
|
|
|
|
current_address = ctx.eip;
|
|
|
|
|
|
|
|
match analysis.ty {
|
|
|
|
AnalysisType::Normal => {
|
|
|
|
dbg_assert!(has_next_instruction);
|
|
|
|
|
|
|
|
if basic_blocks.contains_key(¤t_address) {
|
2018-08-21 19:24:35 +02:00
|
|
|
current_block.last_instruction_addr = addr_before_instruction;
|
2018-07-03 17:28:33 +02:00
|
|
|
current_block.end_addr = current_address;
|
2018-08-21 19:24:35 +02:00
|
|
|
dbg_assert!(!is_near_end_of_page(current_address));
|
2018-07-03 17:28:33 +02:00
|
|
|
current_block.ty = BasicBlockType::Normal {
|
|
|
|
next_block_addr: current_address,
|
|
|
|
};
|
2018-08-21 19:24:35 +02:00
|
|
|
break;
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
AnalysisType::Jump {
|
|
|
|
offset,
|
|
|
|
is_32,
|
2019-01-05 19:29:42 +01:00
|
|
|
condition: Some(condition),
|
2018-07-03 17:28:33 +02:00
|
|
|
} => {
|
2019-01-05 19:29:42 +01:00
|
|
|
// conditional jump: continue at next and continue at jump target
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
let jump_target = if is_32 {
|
|
|
|
current_address.wrapping_add(offset as u32)
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
ctx.cs_offset.wrapping_add(
|
|
|
|
(current_address
|
|
|
|
.wrapping_sub(ctx.cs_offset)
|
2018-08-16 02:05:44 +02:00
|
|
|
.wrapping_add(offset as u32))
|
|
|
|
& 0xFFFF,
|
2018-07-03 17:28:33 +02:00
|
|
|
)
|
|
|
|
};
|
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
dbg_assert!(has_next_instruction);
|
|
|
|
to_visit_stack.push(current_address as u16 & 0xFFF);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
let next_block_branch_taken_addr;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) {
|
|
|
|
to_visit_stack.push(jump_target as u16 & 0xFFF);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
next_block_branch_taken_addr = Some(jump_target);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
// Very simple heuristic for "infinite loops": This
|
|
|
|
// detects Linux's "calibrating delay loop"
|
|
|
|
if jump_target == current_block.addr {
|
|
|
|
dbg_log!("Basic block looping back to front");
|
|
|
|
requires_loop_limit = true;
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
2019-01-05 19:29:42 +01:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
next_block_branch_taken_addr = None;
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
let next_block_addr = if is_near_end_of_page(current_address) {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
Some(current_address)
|
|
|
|
};
|
2018-08-21 19:24:35 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
current_block.ty = BasicBlockType::ConditionalJump {
|
|
|
|
next_block_addr,
|
|
|
|
next_block_branch_taken_addr,
|
|
|
|
condition,
|
|
|
|
jump_offset: offset,
|
|
|
|
jump_offset_is_32: is_32,
|
|
|
|
};
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
current_block.last_instruction_addr = addr_before_instruction;
|
|
|
|
current_block.end_addr = current_address;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
break;
|
|
|
|
},
|
|
|
|
AnalysisType::Jump {
|
|
|
|
offset,
|
|
|
|
is_32,
|
|
|
|
condition: None,
|
|
|
|
} => {
|
|
|
|
// non-conditional jump: continue at jump target
|
|
|
|
|
|
|
|
let jump_target = if is_32 {
|
|
|
|
current_address.wrapping_add(offset as u32)
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
else {
|
2019-01-05 19:29:42 +01:00
|
|
|
ctx.cs_offset.wrapping_add(
|
|
|
|
(current_address
|
|
|
|
.wrapping_sub(ctx.cs_offset)
|
|
|
|
.wrapping_add(offset as u32))
|
|
|
|
& 0xFFFF,
|
|
|
|
)
|
|
|
|
};
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
if has_next_instruction {
|
|
|
|
// Execution will eventually come back to the next instruction (CALL)
|
|
|
|
marked_as_entry.insert(current_address as u16 & 0xFFF);
|
|
|
|
to_visit_stack.push(current_address as u16 & 0xFFF);
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
if Page::page_of(jump_target) == page && !is_near_end_of_page(jump_target) {
|
|
|
|
current_block.ty = BasicBlockType::Normal {
|
|
|
|
next_block_addr: jump_target,
|
|
|
|
};
|
|
|
|
to_visit_stack.push(jump_target as u16 & 0xFFF);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
current_block.ty = BasicBlockType::Exit;
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
current_block.last_instruction_addr = addr_before_instruction;
|
|
|
|
current_block.end_addr = current_address;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-05 19:29:42 +01:00
|
|
|
break;
|
2018-07-03 17:28:33 +02:00
|
|
|
},
|
|
|
|
AnalysisType::BlockBoundary => {
|
|
|
|
// a block boundary but not a jump, get out
|
|
|
|
|
|
|
|
if has_next_instruction {
|
|
|
|
// block boundary, but execution will eventually come back
|
|
|
|
// to the next instruction. Create a new basic block
|
|
|
|
// starting at the next instruction and register it as an
|
|
|
|
// entry point
|
|
|
|
marked_as_entry.insert(current_address as u16 & 0xFFF);
|
|
|
|
to_visit_stack.push(current_address as u16 & 0xFFF);
|
|
|
|
}
|
|
|
|
|
2018-08-21 19:24:35 +02:00
|
|
|
current_block.last_instruction_addr = addr_before_instruction;
|
2018-07-03 17:28:33 +02:00
|
|
|
current_block.end_addr = current_address;
|
|
|
|
break;
|
|
|
|
},
|
|
|
|
}
|
2018-08-21 19:24:35 +02:00
|
|
|
|
|
|
|
if is_near_end_of_page(current_address) {
|
|
|
|
current_block.last_instruction_addr = addr_before_instruction;
|
|
|
|
current_block.end_addr = current_address;
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::COMPILE_CUT_OFF_AT_END_OF_PAGE);
|
2018-08-21 19:24:35 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let previous_block = basic_blocks
|
|
|
|
.range(..current_block.addr)
|
|
|
|
.next_back()
|
|
|
|
.map(|(_, previous_block)| (previous_block.addr, previous_block.end_addr));
|
|
|
|
|
|
|
|
if let Some((start_addr, end_addr)) = previous_block {
|
|
|
|
if current_block.addr < end_addr {
|
|
|
|
// If this block overlaps with the previous block, re-analyze the previous block
|
|
|
|
let old_block = basic_blocks.remove(&start_addr);
|
|
|
|
dbg_assert!(old_block.is_some());
|
|
|
|
to_visit_stack.push(start_addr as u16 & 0xFFF);
|
|
|
|
|
|
|
|
// Note that this does not ensure the invariant that two consecutive blocks don't
|
|
|
|
// overlay. For that, we also need to check the following block.
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
2018-08-21 19:24:35 +02:00
|
|
|
dbg_assert!(current_block.addr < current_block.end_addr);
|
|
|
|
dbg_assert!(current_block.addr <= current_block.last_instruction_addr);
|
|
|
|
dbg_assert!(current_block.last_instruction_addr < current_block.end_addr);
|
|
|
|
|
|
|
|
basic_blocks.insert(current_block.addr, current_block);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for block in basic_blocks.values_mut() {
|
|
|
|
if marked_as_entry.contains(&(block.addr as u16 & 0xFFF)) {
|
|
|
|
block.is_entry_block = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-08-22 01:07:45 +02:00
|
|
|
let basic_blocks: Vec<BasicBlock> = basic_blocks.into_iter().map(|(_, block)| block).collect();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
for i in 0..basic_blocks.len() - 1 {
|
|
|
|
let next_block_addr = basic_blocks[i + 1].addr;
|
2018-08-21 19:24:35 +02:00
|
|
|
let next_block_end_addr = basic_blocks[i + 1].end_addr;
|
|
|
|
let next_block_is_entry = basic_blocks[i + 1].is_entry_block;
|
|
|
|
let block = &basic_blocks[i];
|
2018-08-22 01:07:45 +02:00
|
|
|
dbg_assert!(block.addr < next_block_addr);
|
2018-07-03 17:28:33 +02:00
|
|
|
if next_block_addr < block.end_addr {
|
2018-08-21 19:24:35 +02:00
|
|
|
dbg_log!(
|
|
|
|
"Overlapping first=[from={:x} to={:x} is_entry={}] second=[from={:x} to={:x} is_entry={}]",
|
|
|
|
block.addr,
|
|
|
|
block.end_addr,
|
|
|
|
block.is_entry_block as u8,
|
|
|
|
next_block_addr,
|
|
|
|
next_block_end_addr,
|
|
|
|
next_block_is_entry as u8
|
|
|
|
);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
(basic_blocks, requires_loop_limit)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn create_cache_entry(ctx: &mut JitState, entry: jit_cache_array::Entry) {
|
|
|
|
let mut found_entry_index = None;
|
|
|
|
let phys_addr = entry.start_addr;
|
|
|
|
|
|
|
|
for i in 0..CODE_CACHE_SEARCH_SIZE {
|
|
|
|
let addr_index = (phys_addr + i) & jit_cache_array::MASK;
|
2019-01-06 12:09:28 +01:00
|
|
|
let existing_entry = jit_cache_array::get(addr_index);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2019-01-06 12:09:28 +01:00
|
|
|
if existing_entry.start_addr == entry.start_addr
|
|
|
|
&& existing_entry.state_flags == entry.state_flags
|
|
|
|
{
|
|
|
|
profiler::stat_increment(stat::COMPILE_DUPLICATE_ENTRY);
|
|
|
|
}
|
|
|
|
|
|
|
|
if existing_entry.start_addr == 0 {
|
2018-07-03 17:28:33 +02:00
|
|
|
found_entry_index = Some(addr_index);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let found_entry_index = match found_entry_index {
|
|
|
|
Some(i) => i,
|
|
|
|
None => {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::CACHE_MISMATCH);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
// no free slots, overwrite the first one
|
|
|
|
let found_entry_index = phys_addr & jit_cache_array::MASK;
|
|
|
|
|
|
|
|
let old_entry = jit_cache_array::get_mut(found_entry_index);
|
|
|
|
|
|
|
|
// if we're here, we expect to overwrite a valid index
|
|
|
|
dbg_assert!(old_entry.start_addr != 0);
|
|
|
|
dbg_assert!(old_entry.wasm_table_index != 0);
|
|
|
|
|
|
|
|
if old_entry.wasm_table_index == entry.wasm_table_index {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::INVALIDATE_SINGLE_ENTRY_CACHE_FULL);
|
2018-09-02 00:18:49 +02:00
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
dbg_assert!(old_entry.pending);
|
|
|
|
dbg_assert!(Page::page_of(old_entry.start_addr) == Page::page_of(phys_addr));
|
|
|
|
|
|
|
|
// The old entry belongs to the same wasm table index as this entry.
|
|
|
|
// *Don't* free the wasm table index, instead just delete the old entry
|
|
|
|
// and use its slot for this entry.
|
|
|
|
// TODO: Optimally, we should pick another slot instead of dropping
|
|
|
|
// an entry has just been created.
|
|
|
|
jit_cache_array::remove(found_entry_index);
|
|
|
|
|
|
|
|
dbg_assert!(old_entry.next_index_same_page() == None);
|
|
|
|
old_entry.pending = false;
|
|
|
|
old_entry.start_addr = 0;
|
|
|
|
}
|
|
|
|
else {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::INVALIDATE_MODULE_CACHE_FULL);
|
2018-09-02 00:18:49 +02:00
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
let old_wasm_table_index = old_entry.wasm_table_index;
|
|
|
|
let old_page = Page::page_of(old_entry.start_addr);
|
|
|
|
|
|
|
|
remove_jit_cache_wasm_index(ctx, old_page, old_wasm_table_index);
|
|
|
|
|
|
|
|
//jit_cache_array::check_invariants();
|
|
|
|
|
|
|
|
// old entry should be removed after calling remove_jit_cache_wasm_index
|
|
|
|
|
|
|
|
dbg_assert!(!old_entry.pending);
|
|
|
|
dbg_assert!(old_entry.start_addr == 0);
|
|
|
|
dbg_assert!(old_entry.wasm_table_index == 0);
|
|
|
|
dbg_assert!(old_entry.next_index_same_page() == None);
|
|
|
|
}
|
|
|
|
|
|
|
|
found_entry_index
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
jit_cache_array::insert(found_entry_index, entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
pub fn jit_force_generate_unsafe(
|
|
|
|
ctx: &mut JitState,
|
|
|
|
phys_addr: u32,
|
|
|
|
cs_offset: u32,
|
|
|
|
state_flags: CachedStateFlags,
|
|
|
|
) {
|
|
|
|
record_entry_point(ctx, phys_addr);
|
|
|
|
jit_analyze_and_generate(ctx, Page::page_of(phys_addr), cs_offset, state_flags);
|
|
|
|
}
|
|
|
|
|
2018-07-13 02:17:24 +02:00
|
|
|
#[inline(never)]
|
2018-07-03 17:28:33 +02:00
|
|
|
fn jit_analyze_and_generate(
|
|
|
|
ctx: &mut JitState,
|
|
|
|
page: Page,
|
|
|
|
cs_offset: u32,
|
|
|
|
state_flags: CachedStateFlags,
|
|
|
|
) {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::COMPILE);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
let entry_points = ctx.entry_points.remove(&page);
|
|
|
|
let cpu = CpuContext {
|
|
|
|
eip: 0,
|
|
|
|
prefixes: 0,
|
|
|
|
cs_offset,
|
|
|
|
state_flags,
|
|
|
|
};
|
|
|
|
|
|
|
|
if let Some(entry_points) = entry_points {
|
2021-01-01 02:14:29 +01:00
|
|
|
let (basic_blocks, requires_loop_limit) =
|
2018-07-03 17:28:33 +02:00
|
|
|
jit_find_basic_blocks(page, &entry_points, cpu.clone());
|
|
|
|
|
|
|
|
//for b in basic_blocks.iter() {
|
|
|
|
// dbg_log!(
|
|
|
|
// "> Basic block from {:x} to {:x}, is_entry={}",
|
|
|
|
// b.addr,
|
|
|
|
// b.end_addr,
|
|
|
|
// b.is_entry_block
|
|
|
|
// );
|
|
|
|
//}
|
|
|
|
|
2018-11-14 21:49:13 +01:00
|
|
|
if ctx.wasm_table_index_free_list.is_empty() {
|
|
|
|
dbg_log!(
|
|
|
|
"wasm_table_index_free_list empty ({} pending_free), clearing cache",
|
|
|
|
ctx.wasm_table_index_pending_free.len(),
|
|
|
|
);
|
|
|
|
|
|
|
|
// When no free slots are available, delete all cached modules. We could increase the
|
|
|
|
// size of the table, but this way the initial size acts as an upper bound for the
|
|
|
|
// number of wasm modules that we generate, which we want anyway to avoid getting our
|
|
|
|
// tab killed by browsers due to memory constraints.
|
|
|
|
cpu::jit_clear_cache();
|
|
|
|
|
2018-11-19 17:47:58 +01:00
|
|
|
profiler::stat_increment(stat::INVALIDATE_ALL_MODULES_NO_FREE_WASM_INDICES);
|
|
|
|
|
2018-11-14 21:49:13 +01:00
|
|
|
dbg_log!(
|
|
|
|
"after jit_clear_cache: {} pending_free {} free",
|
|
|
|
ctx.wasm_table_index_pending_free.len(),
|
|
|
|
ctx.wasm_table_index_free_list.len(),
|
|
|
|
);
|
|
|
|
|
|
|
|
// This assertion can fail if all entries are pending (not possible unless
|
|
|
|
// WASM_TABLE_SIZE is set very low)
|
|
|
|
dbg_assert!(!ctx.wasm_table_index_free_list.is_empty());
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
// allocate an index in the wasm table
|
|
|
|
let wasm_table_index = ctx
|
|
|
|
.wasm_table_index_free_list
|
|
|
|
.pop()
|
|
|
|
.expect("allocate wasm table index");
|
|
|
|
dbg_assert!(wasm_table_index != 0);
|
|
|
|
|
2018-11-14 21:49:13 +01:00
|
|
|
jit_generate_module(
|
|
|
|
&basic_blocks,
|
|
|
|
requires_loop_limit,
|
|
|
|
cpu.clone(),
|
|
|
|
&mut ctx.wasm_builder,
|
|
|
|
);
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
// create entries for each basic block that is marked as an entry point
|
|
|
|
let mut entry_point_count = 0;
|
|
|
|
|
|
|
|
for (i, block) in basic_blocks.iter().enumerate() {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::COMPILE_BASIC_BLOCK);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
if block.is_entry_block && block.addr != block.end_addr {
|
|
|
|
dbg_assert!(block.addr != 0);
|
|
|
|
|
|
|
|
let initial_state = i.safe_to_u16();
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
#[allow(unused_mut)]
|
2018-07-03 17:28:33 +02:00
|
|
|
let mut entry = jit_cache_array::Entry::create(
|
|
|
|
block.addr,
|
|
|
|
None, // to be filled in by create_cache_entry
|
|
|
|
wasm_table_index,
|
|
|
|
initial_state,
|
|
|
|
state_flags,
|
|
|
|
true,
|
|
|
|
);
|
|
|
|
|
2018-09-18 00:14:10 +02:00
|
|
|
#[cfg(any(debug_assertions, feature = "profiler"))]
|
2018-07-03 17:28:33 +02:00
|
|
|
{
|
|
|
|
entry.len = block.end_addr - block.addr;
|
2018-09-18 00:14:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
{
|
2018-07-03 17:28:33 +02:00
|
|
|
entry.opcode = cpu::read32(block.addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
create_cache_entry(ctx, entry);
|
|
|
|
|
|
|
|
entry_point_count += 1;
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::COMPILE_ENTRY_POINT);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-21 04:03:46 +01:00
|
|
|
profiler::stat_increment_by(
|
|
|
|
stat::COMPILE_WASM_TOTAL_BYTES,
|
|
|
|
::c_api::jit_get_op_len() as u64,
|
|
|
|
);
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
dbg_assert!(entry_point_count > 0);
|
|
|
|
|
|
|
|
cpu::tlb_set_has_code(page, true);
|
|
|
|
|
|
|
|
jit_cache_array::check_invariants();
|
|
|
|
cpu::check_tlb_invariants();
|
|
|
|
|
|
|
|
let end_addr = 0;
|
|
|
|
let first_opcode = 0;
|
|
|
|
let phys_addr = page.to_address();
|
|
|
|
|
|
|
|
// will call codegen_finalize_finished asynchronously when finished
|
|
|
|
cpu::codegen_finalize(
|
|
|
|
wasm_table_index,
|
|
|
|
phys_addr,
|
|
|
|
end_addr,
|
|
|
|
first_opcode,
|
|
|
|
state_flags,
|
|
|
|
);
|
|
|
|
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::COMPILE_SUCCESS);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
//dbg_log("No basic blocks, not generating code");
|
|
|
|
// Nothing to do
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn codegen_finalize_finished(
|
|
|
|
ctx: &mut JitState,
|
|
|
|
wasm_table_index: u16,
|
|
|
|
phys_addr: u32,
|
|
|
|
_end_addr: u32,
|
|
|
|
_first_opcode: u32,
|
|
|
|
_state_flags: CachedStateFlags,
|
|
|
|
) {
|
2018-07-13 02:04:28 +02:00
|
|
|
dbg_assert!(wasm_table_index != 0);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
match ctx
|
|
|
|
.wasm_table_index_pending_free
|
|
|
|
.iter()
|
|
|
|
.position(|i| *i == wasm_table_index)
|
|
|
|
{
|
|
|
|
Some(i) => {
|
|
|
|
ctx.wasm_table_index_pending_free.swap_remove(i);
|
|
|
|
free_wasm_table_index(ctx, wasm_table_index);
|
|
|
|
},
|
|
|
|
None => {
|
|
|
|
let page = Page::page_of(phys_addr);
|
|
|
|
let mut cache_array_index = jit_cache_array::get_page_index(page);
|
|
|
|
|
|
|
|
while let Some(index) = cache_array_index {
|
|
|
|
let mut entry = jit_cache_array::get_mut(index);
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
if (*entry).wasm_table_index == wasm_table_index {
|
|
|
|
dbg_assert!((*entry).pending);
|
|
|
|
(*entry).pending = false;
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
cache_array_index = (*entry).next_index_same_page();
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
jit_cache_array::check_invariants();
|
|
|
|
|
|
|
|
if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
|
|
|
|
// sanity check that the above iteration marked all entries as not pending
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
for i in 0..jit_cache_array::SIZE {
|
|
|
|
let entry = jit_cache_array::get(i);
|
2018-07-03 17:28:33 +02:00
|
|
|
if entry.wasm_table_index == wasm_table_index {
|
2018-07-13 02:04:28 +02:00
|
|
|
dbg_assert!(!entry.pending);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn jit_generate_module(
|
|
|
|
basic_blocks: &Vec<BasicBlock>,
|
|
|
|
requires_loop_limit: bool,
|
|
|
|
mut cpu: CpuContext,
|
|
|
|
builder: &mut WasmBuilder,
|
|
|
|
) {
|
|
|
|
builder.reset();
|
|
|
|
|
|
|
|
let basic_block_indices: HashMap<u32, u32> = basic_blocks
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
|
|
|
.map(|(index, block)| (block.addr, index as u32))
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
// set state local variable to the initial state passed as the first argument
|
2018-07-28 20:33:40 +02:00
|
|
|
builder
|
|
|
|
.instruction_body
|
|
|
|
.get_local(&builder.arg_local_initial_state);
|
2018-07-30 09:05:33 +02:00
|
|
|
let gen_local_state = builder.set_new_local();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
// initialise max_iterations
|
2018-07-29 11:13:06 +02:00
|
|
|
let gen_local_iteration_counter = if JIT_ALWAYS_USE_LOOP_SAFETY || requires_loop_limit {
|
2018-07-28 20:33:40 +02:00
|
|
|
builder
|
|
|
|
.instruction_body
|
2018-08-22 04:06:52 +02:00
|
|
|
.const_i32(JIT_MAX_ITERATIONS_PER_FUNCTION as i32);
|
2018-07-30 09:05:33 +02:00
|
|
|
Some(builder.set_new_local())
|
2018-07-13 03:10:34 +02:00
|
|
|
}
|
2018-07-29 11:13:06 +02:00
|
|
|
else {
|
|
|
|
None
|
|
|
|
};
|
2018-07-03 17:28:33 +02:00
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
let mut register_locals = (0..8)
|
|
|
|
.map(|i| {
|
|
|
|
builder
|
|
|
|
.instruction_body
|
|
|
|
.const_i32(global_pointers::get_reg32_offset(i) as i32);
|
|
|
|
builder.instruction_body.load_aligned_i32_from_stack(0);
|
|
|
|
let local = builder.set_new_local();
|
|
|
|
local
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
let ctx = &mut JitContext {
|
|
|
|
cpu: &mut cpu,
|
|
|
|
builder,
|
|
|
|
register_locals: &mut register_locals,
|
|
|
|
start_of_current_instruction: 0,
|
2018-12-26 14:25:46 +01:00
|
|
|
current_brtable_depth: 0,
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
};
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
// main state machine loop
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.loop_void();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-07-29 11:13:06 +02:00
|
|
|
if let Some(gen_local_iteration_counter) = gen_local_iteration_counter.as_ref() {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::COMPILE_WITH_LOOP_SAFETY);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
// decrement max_iterations
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder
|
2018-07-28 20:33:40 +02:00
|
|
|
.instruction_body
|
|
|
|
.get_local(gen_local_iteration_counter);
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.const_i32(-1);
|
|
|
|
ctx.builder.instruction_body.add_i32();
|
|
|
|
ctx.builder
|
2018-07-28 20:33:40 +02:00
|
|
|
.instruction_body
|
|
|
|
.set_local(gen_local_iteration_counter);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
// if max_iterations == 0: return
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder
|
2018-07-28 20:33:40 +02:00
|
|
|
.instruction_body
|
|
|
|
.get_local(gen_local_iteration_counter);
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.eqz_i32();
|
|
|
|
ctx.builder.instruction_body.if_void();
|
|
|
|
codegen::gen_debug_track_jit_exit(ctx.builder, 0);
|
|
|
|
codegen::gen_move_registers_from_locals_to_memory(ctx);
|
|
|
|
ctx.builder.instruction_body.return_();
|
|
|
|
ctx.builder.instruction_body.block_end();
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.block_void(); // for the default case
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-12-26 14:25:46 +01:00
|
|
|
ctx.builder.instruction_body.block_void(); // for the exit-with-pagefault case
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
// generate the opening blocks for the cases
|
|
|
|
|
|
|
|
for _ in 0..basic_blocks.len() {
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.block_void();
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.get_local(&gen_local_state);
|
|
|
|
ctx.builder
|
2018-07-28 20:33:40 +02:00
|
|
|
.instruction_body
|
2018-12-26 14:25:46 +01:00
|
|
|
.brtable_and_cases(basic_blocks.len() as u32 + 1); // plus one for the exit-with-pagefault case
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
for (i, block) in basic_blocks.iter().enumerate() {
|
|
|
|
// Case [i] will jump after the [i]th block, so we first generate the
|
|
|
|
// block end opcode and then the code for that block
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.block_end();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-12-26 14:25:46 +01:00
|
|
|
ctx.current_brtable_depth = basic_blocks.len() as u32 + 1 - i as u32;
|
|
|
|
|
2018-08-21 19:24:35 +02:00
|
|
|
dbg_assert!(block.addr < block.end_addr);
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
jit_generate_basic_block(ctx, block);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
let invalid_connection_to_next_block = block.end_addr != ctx.cpu.eip;
|
2018-08-21 19:25:32 +02:00
|
|
|
dbg_assert!(!invalid_connection_to_next_block);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-08-31 23:52:53 +02:00
|
|
|
match &block.ty {
|
|
|
|
BasicBlockType::Exit => {
|
2018-07-03 17:28:33 +02:00
|
|
|
// Exit this function
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr);
|
|
|
|
codegen::gen_move_registers_from_locals_to_memory(ctx);
|
|
|
|
ctx.builder.instruction_body.return_();
|
2018-07-03 17:28:33 +02:00
|
|
|
},
|
2018-08-31 23:52:53 +02:00
|
|
|
BasicBlockType::Normal { next_block_addr } => {
|
2018-07-03 17:28:33 +02:00
|
|
|
// Unconditional jump to next basic block
|
|
|
|
// - All instructions that don't change eip
|
|
|
|
// - Unconditional jump
|
|
|
|
|
2021-01-01 02:14:28 +01:00
|
|
|
let next_basic_block_index = *basic_block_indices
|
2018-08-21 03:10:58 +02:00
|
|
|
.get(&next_block_addr)
|
|
|
|
.expect("basic_block_indices.get (Normal)");
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:28 +01:00
|
|
|
if next_basic_block_index == (i as u32) + 1 {
|
|
|
|
// fallthru
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// set state variable to next basic block
|
|
|
|
ctx.builder
|
|
|
|
.instruction_body
|
|
|
|
.const_i32(next_basic_block_index as i32);
|
|
|
|
ctx.builder.instruction_body.set_local(&gen_local_state);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:28 +01:00
|
|
|
ctx.builder.instruction_body.br(ctx.current_brtable_depth); // to the loop
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
},
|
2018-08-31 23:52:53 +02:00
|
|
|
&BasicBlockType::ConditionalJump {
|
|
|
|
next_block_addr,
|
|
|
|
next_block_branch_taken_addr,
|
|
|
|
condition,
|
|
|
|
jump_offset,
|
|
|
|
jump_offset_is_32,
|
|
|
|
} => {
|
2018-07-03 17:28:33 +02:00
|
|
|
// Conditional jump to next basic block
|
2021-01-01 02:14:30 +01:00
|
|
|
// - jnz, jc, loop, jcxz, etc.
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:30 +01:00
|
|
|
codegen::gen_condition_fn(ctx, condition);
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.if_void();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
// Branch taken
|
|
|
|
|
|
|
|
if jump_offset_is_32 {
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
codegen::gen_relative_jump(ctx.builder, jump_offset);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
else {
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
codegen::gen_jmp_rel16(ctx.builder, jump_offset as u16);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(next_block_branch_taken_addr) = next_block_branch_taken_addr {
|
|
|
|
let next_basic_block_branch_taken_index = *basic_block_indices
|
|
|
|
.get(&next_block_branch_taken_addr)
|
2018-08-21 03:10:58 +02:00
|
|
|
.expect("basic_block_indices.get (branch taken)");
|
2018-07-03 17:28:33 +02:00
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder
|
2018-07-28 20:33:40 +02:00
|
|
|
.instruction_body
|
2018-08-22 04:06:52 +02:00
|
|
|
.const_i32(next_basic_block_branch_taken_index as i32);
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.set_local(&gen_local_state);
|
2021-01-01 02:14:28 +01:00
|
|
|
|
|
|
|
ctx.builder
|
|
|
|
.instruction_body
|
|
|
|
.br(basic_blocks.len() as u32 + 2 - i as u32); // to the loop
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
// Jump to different page
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr);
|
|
|
|
codegen::gen_move_registers_from_locals_to_memory(ctx);
|
|
|
|
ctx.builder.instruction_body.return_();
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
2018-08-21 19:24:35 +02:00
|
|
|
if let Some(next_block_addr) = next_block_addr {
|
2018-07-03 17:28:33 +02:00
|
|
|
// Branch not taken
|
2021-01-01 02:14:28 +01:00
|
|
|
|
2018-08-21 03:10:58 +02:00
|
|
|
let next_basic_block_index = *basic_block_indices
|
|
|
|
.get(&next_block_addr)
|
|
|
|
.expect("basic_block_indices.get (branch not taken)");
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:28 +01:00
|
|
|
if next_basic_block_index == (i as u32) + 1 {
|
|
|
|
// fallthru
|
|
|
|
ctx.builder.instruction_body.block_end();
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
ctx.builder.instruction_body.else_();
|
|
|
|
|
|
|
|
ctx.builder
|
|
|
|
.instruction_body
|
|
|
|
.const_i32(next_basic_block_index as i32);
|
|
|
|
ctx.builder.instruction_body.set_local(&gen_local_state);
|
|
|
|
|
|
|
|
ctx.builder
|
|
|
|
.instruction_body
|
|
|
|
.br(basic_blocks.len() as u32 + 2 - i as u32); // to the loop
|
|
|
|
|
|
|
|
ctx.builder.instruction_body.block_end();
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
2018-08-21 19:24:35 +02:00
|
|
|
else {
|
2021-01-01 02:14:28 +01:00
|
|
|
ctx.builder.instruction_body.else_();
|
|
|
|
|
2018-08-21 19:24:35 +02:00
|
|
|
// End of this page
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
codegen::gen_debug_track_jit_exit(ctx.builder, block.last_instruction_addr);
|
|
|
|
codegen::gen_move_registers_from_locals_to_memory(ctx);
|
|
|
|
ctx.builder.instruction_body.return_();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:28 +01:00
|
|
|
ctx.builder.instruction_body.block_end();
|
|
|
|
}
|
2018-07-03 17:28:33 +02:00
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-26 14:25:46 +01:00
|
|
|
{
|
|
|
|
// exit-with-pagefault case
|
|
|
|
ctx.builder.instruction_body.block_end();
|
|
|
|
codegen::gen_move_registers_from_locals_to_memory(ctx);
|
2018-12-26 15:22:08 +01:00
|
|
|
codegen::gen_fn0_const(ctx.builder, "trigger_pagefault_end_jit");
|
2018-12-27 19:35:16 +01:00
|
|
|
codegen::gen_clear_prefixes(ctx);
|
2018-12-26 14:25:46 +01:00
|
|
|
ctx.builder.instruction_body.return_();
|
|
|
|
}
|
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.block_end(); // default case
|
|
|
|
ctx.builder.instruction_body.unreachable();
|
2018-07-03 17:28:33 +02:00
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.instruction_body.block_end(); // loop
|
2018-07-03 17:28:33 +02:00
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.free_local(gen_local_state);
|
2018-12-04 22:11:34 +01:00
|
|
|
if let Some(local) = gen_local_iteration_counter {
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder.free_local(local);
|
2018-12-04 22:11:34 +01:00
|
|
|
}
|
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
for local in ctx.register_locals.drain(..) {
|
|
|
|
ctx.builder.free_local(local);
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx.builder.finish();
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
fn jit_generate_basic_block(ctx: &mut JitContext, block: &BasicBlock) {
|
|
|
|
let start_addr = block.addr;
|
|
|
|
let last_instruction_addr = block.last_instruction_addr;
|
|
|
|
let stop_addr = block.end_addr;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
// First iteration of do-while assumes the caller confirms this condition
|
|
|
|
dbg_assert!(!is_near_end_of_page(start_addr));
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
codegen::gen_increment_timestamp_counter(ctx.builder, block.number_of_instructions as i32);
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.cpu.eip = start_addr;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
loop {
|
2021-01-01 02:14:28 +01:00
|
|
|
let mut instruction = 0;
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
if cfg!(feature = "profiler") {
|
2021-01-01 02:14:28 +01:00
|
|
|
instruction = cpu::read32(ctx.cpu.eip);
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
::opstats::gen_opstats(ctx.builder, instruction);
|
2018-09-18 00:33:36 +02:00
|
|
|
::opstats::record_opstat_compiled(instruction);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
2018-08-22 01:07:45 +02:00
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
if ctx.cpu.eip == last_instruction_addr {
|
2018-08-22 01:07:45 +02:00
|
|
|
// Before the last instruction:
|
|
|
|
// - Set eip to *after* the instruction
|
|
|
|
// - Set previous_eip to *before* the instruction
|
|
|
|
codegen::gen_set_previous_eip_offset_from_eip(
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.builder,
|
2018-08-22 01:07:45 +02:00
|
|
|
last_instruction_addr - start_addr,
|
|
|
|
);
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
codegen::gen_increment_instruction_pointer(ctx.builder, stop_addr - start_addr);
|
2018-08-22 01:07:45 +02:00
|
|
|
}
|
|
|
|
|
2021-01-01 02:14:28 +01:00
|
|
|
let wasm_length_before = ctx.builder.instruction_body.len();
|
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
ctx.start_of_current_instruction = ctx.cpu.eip;
|
|
|
|
let start_eip = ctx.cpu.eip;
|
2018-07-03 17:28:33 +02:00
|
|
|
let mut instruction_flags = 0;
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
jit_instructions::jit_instruction(ctx, &mut instruction_flags);
|
|
|
|
let end_eip = ctx.cpu.eip;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
let instruction_length = end_eip - start_eip;
|
2018-08-22 01:07:45 +02:00
|
|
|
let was_block_boundary = instruction_flags & JIT_INSTR_BLOCK_BOUNDARY_FLAG != 0;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2021-01-01 02:14:28 +01:00
|
|
|
let wasm_length = ctx.builder.instruction_body.len() - wasm_length_before;
|
|
|
|
::opstats::record_opstat_size_wasm(instruction, wasm_length as u32);
|
|
|
|
|
2018-08-21 19:24:35 +02:00
|
|
|
dbg_assert!((end_eip == stop_addr) == (start_eip == last_instruction_addr));
|
2018-07-03 17:28:33 +02:00
|
|
|
dbg_assert!(instruction_length < MAX_INSTRUCTION_LENGTH);
|
|
|
|
|
Store registers in locals
This changes registers to be temporarily stored in wasm locals, across
each complete wasm module. Registers are moved from memory to locals
upon entering the wasm module and moved from locals to memory upon
leaving. Additionally, calls to functions that modify registers are
wrapped between moving registers to memory before and moving back to
locals after. This affects:
1. All non-custom instructions
2. safe_{read,write}_slow, since it may page fault (the slow path of all memory accesses)
3. task_switch_test* and trigger_ud
4. All block boundaries
5. The fallback functions of gen_safe_read_write (read-modify-write memory accesses)
The performance benefits are currently mostly eaten up by 1. and 4. (if
one calculates the total number of read/writes to registers in memory,
they are higher after this patch, as each instructions of typ 1. or 4.
requires moving all 8 register twice). This can be improved later by the
relatively mechanical work of making instructions custom (not
necessarily full code generation, only the part of the instruction where
registers are accessed). Multi-page wasm module generation will
significantly reduce the number of type 4. instructions.
Due to 2., the overall code size has significantly increased. This case
(the slow path of memory access) is often generated but rarely executed.
These moves can be removed in a later patch by a different scheme for
safe_{read,write}_slow, which has been left out of this patch for
simplicity of reviewing.
This also simplifies our code generation for storing registers, as
instructions_body.const_i32(register_offset);
// some computations ...
instruction_body.store_i32();
turns into:
// some computations ...
write_register(register_index);
I.e., a prefix is not necessary anymore as locals are indexed directly.
Further patches will allow getting rid of some temporary locals, as
registers now can be used directly.
2018-12-25 11:42:03 +01:00
|
|
|
let end_addr = ctx.cpu.eip;
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
if end_addr == stop_addr {
|
2018-08-22 01:07:45 +02:00
|
|
|
// no page was crossed
|
|
|
|
dbg_assert!(Page::page_of(end_addr) == Page::page_of(start_addr));
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if was_block_boundary || is_near_end_of_page(end_addr) || end_addr > stop_addr {
|
2021-01-01 02:14:30 +01:00
|
|
|
dbg_log!(
|
|
|
|
"Overlapping basic blocks start={:x} expected_end={:x} end={:x} was_block_boundary={} near_end_of_page={}",
|
|
|
|
start_addr,
|
|
|
|
stop_addr,
|
|
|
|
end_addr,
|
|
|
|
was_block_boundary,
|
|
|
|
is_near_end_of_page(end_addr)
|
|
|
|
);
|
2018-08-21 19:24:35 +02:00
|
|
|
dbg_assert!(false);
|
2018-07-03 17:28:33 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn jit_increase_hotness_and_maybe_compile(
|
|
|
|
ctx: &mut JitState,
|
|
|
|
phys_address: u32,
|
|
|
|
cs_offset: u32,
|
|
|
|
state_flags: CachedStateFlags,
|
2018-09-18 01:34:10 +02:00
|
|
|
hotness: u32,
|
2018-07-03 17:28:33 +02:00
|
|
|
) {
|
|
|
|
let page = Page::page_of(phys_address);
|
|
|
|
let address_hash = jit_hot_hash_page(page) as usize;
|
2018-09-18 01:34:10 +02:00
|
|
|
ctx.hot_code_addresses[address_hash] += hotness;
|
2018-07-03 17:28:33 +02:00
|
|
|
if ctx.hot_code_addresses[address_hash] >= JIT_THRESHOLD {
|
|
|
|
ctx.hot_code_addresses[address_hash] = 0;
|
|
|
|
jit_analyze_and_generate(ctx, page, cs_offset, state_flags)
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
fn free_wasm_table_index(ctx: &mut JitState, wasm_table_index: u16) {
|
|
|
|
if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
|
|
|
|
dbg_assert!(!ctx.wasm_table_index_free_list.contains(&wasm_table_index));
|
|
|
|
}
|
2018-09-12 23:06:29 +02:00
|
|
|
ctx.wasm_table_index_free_list.push(wasm_table_index);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
2018-09-12 23:06:29 +02:00
|
|
|
// It is not strictly necessary to clear the function, but it will fail more predictably if we
|
|
|
|
// accidentally use the function and may garbage collect unused modules earlier
|
|
|
|
cpu::jit_clear_func(wasm_table_index);
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Remove all entries with the given wasm_table_index in page
|
|
|
|
fn remove_jit_cache_wasm_index(ctx: &mut JitState, page: Page, wasm_table_index: u16) {
|
|
|
|
let mut cache_array_index = jit_cache_array::get_page_index(page).unwrap();
|
|
|
|
|
|
|
|
let mut pending = false;
|
|
|
|
|
|
|
|
loop {
|
|
|
|
let entry = jit_cache_array::get_mut(cache_array_index);
|
|
|
|
let next_cache_array_index = entry.next_index_same_page();
|
|
|
|
|
|
|
|
if entry.wasm_table_index == wasm_table_index {
|
|
|
|
// if one entry is pending, all must be pending
|
|
|
|
dbg_assert!(!pending || entry.pending);
|
|
|
|
|
|
|
|
pending = entry.pending;
|
|
|
|
|
|
|
|
jit_cache_array::remove(cache_array_index);
|
|
|
|
|
|
|
|
dbg_assert!(entry.next_index_same_page() == None);
|
|
|
|
entry.wasm_table_index = 0;
|
|
|
|
entry.start_addr = 0;
|
|
|
|
entry.pending = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(i) = next_cache_array_index {
|
|
|
|
cache_array_index = i;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if pending {
|
|
|
|
ctx.wasm_table_index_pending_free.push(wasm_table_index);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
free_wasm_table_index(ctx, wasm_table_index);
|
|
|
|
}
|
|
|
|
|
|
|
|
if !jit_page_has_code(ctx, page) {
|
|
|
|
cpu::tlb_set_has_code(page, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
if CHECK_JIT_CACHE_ARRAY_INVARIANTS {
|
|
|
|
// sanity check that the above iteration deleted all entries
|
|
|
|
|
2021-01-01 02:14:29 +01:00
|
|
|
for i in 0..jit_cache_array::SIZE {
|
|
|
|
let entry = jit_cache_array::get(i);
|
2018-07-03 17:28:33 +02:00
|
|
|
dbg_assert!(entry.wasm_table_index != wasm_table_index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Register a write in this page: Delete all present code
|
|
|
|
pub fn jit_dirty_page(ctx: &mut JitState, page: Page) {
|
|
|
|
let mut did_have_code = false;
|
|
|
|
|
|
|
|
if let Some(mut cache_array_index) = jit_cache_array::get_page_index(page) {
|
|
|
|
did_have_code = true;
|
|
|
|
|
|
|
|
let mut index_to_free = HashSet::new();
|
|
|
|
let mut index_to_pending_free = HashSet::new();
|
|
|
|
|
|
|
|
jit_cache_array::set_page_index(page, None);
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::INVALIDATE_PAGE);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
loop {
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::INVALIDATE_CACHE_ENTRY);
|
2018-07-03 17:28:33 +02:00
|
|
|
let entry = jit_cache_array::get_mut(cache_array_index);
|
|
|
|
let wasm_table_index = entry.wasm_table_index;
|
|
|
|
|
2018-07-13 02:04:28 +02:00
|
|
|
dbg_assert!(page == Page::page_of(entry.start_addr));
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
let next_cache_array_index = entry.next_index_same_page();
|
|
|
|
|
|
|
|
entry.set_next_index_same_page(None);
|
|
|
|
entry.start_addr = 0;
|
|
|
|
entry.wasm_table_index = 0;
|
|
|
|
|
|
|
|
if entry.pending {
|
2018-09-02 00:18:49 +02:00
|
|
|
dbg_assert!(!index_to_free.contains(&wasm_table_index));
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
entry.pending = false;
|
|
|
|
|
|
|
|
index_to_pending_free.insert(wasm_table_index);
|
|
|
|
}
|
|
|
|
else {
|
2018-09-02 00:18:49 +02:00
|
|
|
dbg_assert!(!index_to_pending_free.contains(&wasm_table_index));
|
2018-07-03 17:28:33 +02:00
|
|
|
index_to_free.insert(wasm_table_index);
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(i) = next_cache_array_index {
|
|
|
|
cache_array_index = i;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-02 00:18:49 +02:00
|
|
|
profiler::stat_increment_by(
|
2018-09-18 20:23:05 +02:00
|
|
|
stat::INVALIDATE_MODULE,
|
2018-09-13 00:56:32 +02:00
|
|
|
index_to_pending_free.len() as u64 + index_to_free.len() as u64,
|
2018-09-02 00:18:49 +02:00
|
|
|
);
|
|
|
|
|
2018-07-03 17:28:33 +02:00
|
|
|
for index in index_to_free.iter().cloned() {
|
|
|
|
free_wasm_table_index(ctx, index)
|
|
|
|
}
|
|
|
|
|
|
|
|
for index in index_to_pending_free {
|
|
|
|
ctx.wasm_table_index_pending_free.push(index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
match ctx.entry_points.remove(&page) {
|
|
|
|
None => {},
|
|
|
|
Some(_entry_points) => {
|
|
|
|
did_have_code = true;
|
|
|
|
|
|
|
|
// don't try to compile code in this page anymore until it's hot again
|
|
|
|
ctx.hot_code_addresses[jit_hot_hash_page(page) as usize] = 0;
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
if did_have_code {
|
|
|
|
cpu::tlb_set_has_code(page, false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn jit_dirty_cache(ctx: &mut JitState, start_addr: u32, end_addr: u32) {
|
2018-07-13 02:04:28 +02:00
|
|
|
dbg_assert!(start_addr < end_addr);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
let start_page = Page::page_of(start_addr);
|
|
|
|
let end_page = Page::page_of(end_addr - 1);
|
|
|
|
|
|
|
|
for page in start_page.to_u32()..end_page.to_u32() + 1 {
|
|
|
|
jit_dirty_page(ctx, Page::page_of(page << 12));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn jit_dirty_cache_small(ctx: &mut JitState, start_addr: u32, end_addr: u32) {
|
2018-07-13 02:04:28 +02:00
|
|
|
dbg_assert!(start_addr < end_addr);
|
2018-07-03 17:28:33 +02:00
|
|
|
|
|
|
|
let start_page = Page::page_of(start_addr);
|
|
|
|
let end_page = Page::page_of(end_addr - 1);
|
|
|
|
|
|
|
|
jit_dirty_page(ctx, start_page);
|
|
|
|
|
|
|
|
// Note: This can't happen when paging is enabled, as writes across
|
|
|
|
// boundaries are split up on two pages
|
|
|
|
if start_page != end_page {
|
2018-07-13 02:04:28 +02:00
|
|
|
dbg_assert!(start_page.to_u32() + 1 == end_page.to_u32());
|
2018-07-03 17:28:33 +02:00
|
|
|
jit_dirty_page(ctx, end_page);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn jit_empty_cache(ctx: &mut JitState) {
|
|
|
|
ctx.entry_points.clear();
|
|
|
|
|
2018-11-14 21:49:13 +01:00
|
|
|
for page_index in 0..0x100000 {
|
|
|
|
jit_dirty_page(ctx, Page::page_of(page_index << 12))
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn jit_page_has_code(ctx: &JitState, page: Page) -> bool {
|
2021-01-01 02:14:29 +01:00
|
|
|
jit_cache_array::get_page_index(page) != None || ctx.entry_points.contains_key(&page)
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
pub fn jit_unused_cache_stat() -> u32 {
|
2021-01-01 02:14:29 +01:00
|
|
|
let mut count = 0;
|
|
|
|
for i in 0..jit_cache_array::SIZE {
|
|
|
|
if (jit_cache_array::get(i)).start_addr == 0 {
|
|
|
|
count += 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return count;
|
2018-07-03 17:28:33 +02:00
|
|
|
}
|
|
|
|
#[cfg(debug_assertions)]
|
2021-01-01 02:14:29 +01:00
|
|
|
pub fn jit_get_entry_length(i: u32) -> u32 { (jit_cache_array::get(i)).len }
|
2018-07-03 17:28:33 +02:00
|
|
|
#[cfg(debug_assertions)]
|
2021-01-01 02:14:29 +01:00
|
|
|
pub fn jit_get_entry_address(i: u32) -> u32 { (jit_cache_array::get(i)).start_addr }
|
2018-07-03 17:28:33 +02:00
|
|
|
#[cfg(debug_assertions)]
|
2021-01-01 02:14:29 +01:00
|
|
|
pub fn jit_get_entry_pending(i: u32) -> bool { (jit_cache_array::get(i)).pending }
|
2018-07-03 17:28:33 +02:00
|
|
|
#[cfg(debug_assertions)]
|
|
|
|
pub fn jit_get_wasm_table_index_free_list_count(ctx: &JitState) -> u32 {
|
|
|
|
ctx.wasm_table_index_free_list.len() as u32
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn jit_get_op_len(ctx: &JitState) -> u32 { ctx.wasm_builder.get_op_len() }
|
|
|
|
pub fn jit_get_op_ptr(ctx: &JitState) -> *const u8 { ctx.wasm_builder.get_op_ptr() }
|
2018-09-18 00:14:10 +02:00
|
|
|
|
|
|
|
#[cfg(feature = "profiler")]
|
2018-09-18 20:13:23 +02:00
|
|
|
pub fn check_missed_entry_points(phys_address: u32, state_flags: CachedStateFlags) {
|
2018-09-18 00:14:10 +02:00
|
|
|
let page = Page::page_of(phys_address);
|
|
|
|
|
|
|
|
for i in page.to_address()..page.to_address() + 4096 {
|
|
|
|
// No need to check [CODE_CACHE_SEARCH_SIZE] entries here as we look at consecutive
|
|
|
|
// addresses anyway
|
|
|
|
let index = i & jit_cache_array::MASK;
|
2021-01-01 02:14:29 +01:00
|
|
|
let entry = jit_cache_array::get(index);
|
2018-09-18 00:14:10 +02:00
|
|
|
|
|
|
|
if !entry.pending
|
|
|
|
&& entry.state_flags == state_flags
|
|
|
|
&& phys_address >= entry.start_addr
|
|
|
|
&& phys_address < entry.start_addr + entry.len
|
|
|
|
{
|
2018-09-18 20:23:05 +02:00
|
|
|
profiler::stat_increment(stat::RUN_INTERPRETED_MISSED_COMPILED_ENTRY_LOOKUP);
|
2018-09-18 00:14:10 +02:00
|
|
|
|
2018-11-23 17:59:49 +01:00
|
|
|
let last_jump_type = unsafe { ::cpu2::cpu::debug_last_jump.name() };
|
|
|
|
let last_jump_addr =
|
|
|
|
unsafe { ::cpu2::cpu::debug_last_jump.phys_address() }.unwrap_or(0);
|
2021-01-01 02:14:30 +01:00
|
|
|
let last_jump_opcode =
|
|
|
|
if last_jump_addr != 0 { cpu::read32(last_jump_addr) } else { 0 };
|
2018-09-18 00:14:10 +02:00
|
|
|
|
|
|
|
let opcode = cpu::read32(phys_address);
|
|
|
|
dbg_log!(
|
|
|
|
"Compiled exists, but no entry point, \
|
|
|
|
start={:x} end={:x} phys_addr={:x} opcode={:02x} {:02x} {:02x} {:02x}. \
|
|
|
|
Last jump at {:x} ({}) opcode={:02x} {:02x} {:02x} {:02x}",
|
|
|
|
entry.start_addr,
|
|
|
|
entry.start_addr + entry.len,
|
|
|
|
phys_address,
|
|
|
|
opcode & 0xFF,
|
|
|
|
opcode >> 8 & 0xFF,
|
|
|
|
opcode >> 16 & 0xFF,
|
|
|
|
opcode >> 16 & 0xFF,
|
|
|
|
last_jump_addr,
|
|
|
|
last_jump_type,
|
|
|
|
last_jump_opcode & 0xFF,
|
|
|
|
last_jump_opcode >> 8 & 0xFF,
|
|
|
|
last_jump_opcode >> 16 & 0xFF,
|
|
|
|
last_jump_opcode >> 16 & 0xFF,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|