Keep track of cached code entries per page
- Use this data structure to delete cached code immediately when page is written, not later when wasm index is reused - Remove "dirty page" data structure - Simplify cycle_internal, as no entries can be found dirty, they are removed immediately after being overwritten
This commit is contained in:
parent
f2c8957319
commit
2fd4e50c93
|
@ -223,8 +223,8 @@ function V86Starter(options)
|
|||
},
|
||||
"_get_time": Date.now,
|
||||
|
||||
"_codegen_finalize": (wasm_table_index, start, end, first_opcode, state_flags, page_dirtiness) => {
|
||||
cpu.codegen_finalize(wasm_table_index, start, end, first_opcode, state_flags, page_dirtiness);
|
||||
"_codegen_finalize": (wasm_table_index, start, end, first_opcode, state_flags) => {
|
||||
cpu.codegen_finalize(wasm_table_index, start, end, first_opcode, state_flags);
|
||||
},
|
||||
"_coverage_log": (fn_name_offset, num_blocks, visited_block) => {
|
||||
coverage_logger.log(fn_name_offset, num_blocks, visited_block);
|
||||
|
|
|
@ -1248,7 +1248,7 @@ if(PROFILING)
|
|||
var seen_code = {};
|
||||
var seen_code_uncompiled = {};
|
||||
|
||||
CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_opcode, state_flags, page_dirtiness)
|
||||
CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_opcode, state_flags)
|
||||
{
|
||||
dbg_assert(wasm_table_index >= 0 && wasm_table_index < WASM_TABLE_SIZE);
|
||||
//dbg_log("finalize");
|
||||
|
@ -1300,7 +1300,7 @@ CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_op
|
|||
|
||||
this.wm.exports["_codegen_finalize_finished"](
|
||||
wasm_table_index, start, end,
|
||||
first_opcode, state_flags, page_dirtiness);
|
||||
first_opcode, state_flags);
|
||||
|
||||
// The following will throw if f isn't an exported function
|
||||
this.wm.imports["env"].table.set(wasm_table_index, f);
|
||||
|
|
103
src/native/cpu.c
103
src/native/cpu.c
|
@ -26,7 +26,8 @@ struct code_cache jit_cache_arr[WASM_TABLE_SIZE] = {
|
|||
.opcode = {0},
|
||||
.len = 0,
|
||||
#endif
|
||||
.group_status = 0,
|
||||
.next_index_same_page = 0,
|
||||
|
||||
.wasm_table_index = 0,
|
||||
.initial_state = 0,
|
||||
.state_flags = 0,
|
||||
|
@ -38,7 +39,8 @@ uint64_t tsc_offset = 0;
|
|||
|
||||
uint32_t jit_block_boundary = 0;
|
||||
int32_t hot_code_addresses[HASH_PRIME] = {0};
|
||||
uint32_t group_dirtiness[GROUP_DIRTINESS_LENGTH] = {0};
|
||||
|
||||
int32_t page_first_jit_cache_entry[GROUP_DIRTINESS_LENGTH] = {0};
|
||||
|
||||
uint16_t wasm_table_index_free_list[0x10000] = { 0 };
|
||||
int32_t wasm_table_index_free_list_count = 0;
|
||||
|
@ -630,9 +632,7 @@ static struct code_cache* create_cache_entry(uint32_t phys_addr)
|
|||
uint16_t addr_index = (phys_addr + i) & JIT_PHYS_MASK;
|
||||
struct code_cache* entry = &jit_cache_arr[addr_index];
|
||||
|
||||
uint32_t page_dirtiness = group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT];
|
||||
|
||||
if(!entry->start_addr || entry->group_status != page_dirtiness)
|
||||
if(!entry->start_addr)
|
||||
{
|
||||
if(i > 0)
|
||||
{
|
||||
|
@ -640,6 +640,22 @@ static struct code_cache* create_cache_entry(uint32_t phys_addr)
|
|||
phys_addr, jit_cache_arr[addr_index - 1].start_addr);
|
||||
}
|
||||
|
||||
uint32_t page = phys_addr >> 12;
|
||||
int32_t previous_entry_index = page_first_jit_cache_entry[page];
|
||||
|
||||
if(previous_entry_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
|
||||
{
|
||||
struct code_cache* previous_entry = &jit_cache_arr[previous_entry_index];
|
||||
|
||||
if(previous_entry->start_addr)
|
||||
{
|
||||
assert(same_page(previous_entry->start_addr, phys_addr));
|
||||
}
|
||||
}
|
||||
|
||||
page_first_jit_cache_entry[page] = addr_index;
|
||||
entry->next_index_same_page = previous_entry_index;
|
||||
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
@ -648,6 +664,8 @@ static struct code_cache* create_cache_entry(uint32_t phys_addr)
|
|||
uint16_t addr_index = phys_addr & JIT_PHYS_MASK;
|
||||
struct code_cache* entry = &jit_cache_arr[addr_index];
|
||||
|
||||
// TODO: Free wasm table index
|
||||
|
||||
profiler_stat_increment(S_CACHE_MISMATCH);
|
||||
return entry;
|
||||
}
|
||||
|
@ -657,7 +675,7 @@ static bool is_near_end_of_page(uint32_t addr)
|
|||
return (addr & 0xFFF) >= (0x1000 - 16);
|
||||
}
|
||||
|
||||
static bool same_page(int32_t addr1, int32_t addr2)
|
||||
bool same_page(int32_t addr1, int32_t addr2)
|
||||
{
|
||||
return (addr1 & ~0xFFF) == (addr2 & ~0xFFF);
|
||||
}
|
||||
|
@ -766,9 +784,8 @@ static void jit_generate_basic_block(int32_t start_addr, int32_t stop_addr)
|
|||
|
||||
void codegen_finalize_finished(
|
||||
int32_t wasm_table_index, uint32_t phys_addr, uint32_t end_addr,
|
||||
int32_t first_opcode, cached_state_flags state_flags, uint32_t page_dirtiness)
|
||||
int32_t first_opcode, cached_state_flags state_flags)
|
||||
{
|
||||
if(page_dirtiness == group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT])
|
||||
{
|
||||
// XXX: Avoid looping through entire table here (requires different data structure)
|
||||
|
||||
|
@ -788,20 +805,14 @@ void codegen_finalize_finished(
|
|||
|
||||
// sanity check that we're looking at the right entry
|
||||
assert(entry->pending);
|
||||
assert(entry->group_status == page_dirtiness);
|
||||
assert(entry->start_addr == phys_addr);
|
||||
assert(entry->state_flags == state_flags);
|
||||
#endif
|
||||
UNUSED(page_dirtiness);
|
||||
UNUSED(phys_addr);
|
||||
UNUSED(state_flags);
|
||||
UNUSED(end_addr);
|
||||
UNUSED(first_opcode);
|
||||
}
|
||||
else
|
||||
{
|
||||
// the page has been written, drop this entry
|
||||
}
|
||||
}
|
||||
|
||||
static struct code_cache* find_cache_entry(uint32_t phys_addr)
|
||||
|
@ -823,29 +834,6 @@ static struct code_cache* find_cache_entry(uint32_t phys_addr)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
struct code_cache* find_link_block_target(int32_t target)
|
||||
{
|
||||
int32_t eip = *previous_ip;
|
||||
|
||||
if(same_page(eip, target))
|
||||
{
|
||||
assert((eip & ~0xFFF) == *last_virt_eip);
|
||||
assert((target & ~0xFFF) == *last_virt_eip);
|
||||
|
||||
uint32_t phys_target = *eip_phys ^ target;
|
||||
struct code_cache* entry = find_cache_entry(phys_target);
|
||||
|
||||
if(entry &&
|
||||
!entry->pending &&
|
||||
entry->group_status == group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT])
|
||||
{
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void jit_link_block_conditional(int32_t offset, const char* condition)
|
||||
{
|
||||
// Note: block linking cannot rely on the absolute value of eip, as blocks
|
||||
|
@ -1232,7 +1220,7 @@ static void jit_find_basic_blocks()
|
|||
}
|
||||
}
|
||||
|
||||
static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
|
||||
static void jit_generate(uint32_t phys_addr)
|
||||
{
|
||||
profiler_stat_increment(S_COMPILE);
|
||||
profiler_start(P_GEN_INSTR);
|
||||
|
@ -1431,7 +1419,6 @@ static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
|
|||
|
||||
entry->start_addr = phys_addr;
|
||||
entry->state_flags = state_flags;
|
||||
entry->group_status = page_dirtiness;
|
||||
entry->pending = true;
|
||||
entry->initial_state = i;
|
||||
entry->wasm_table_index = wasm_table_index;
|
||||
|
@ -1455,7 +1442,7 @@ static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
|
|||
// will call codegen_finalize_finished asynchronously when finished
|
||||
codegen_finalize(
|
||||
wasm_table_index, phys_addr, end_addr,
|
||||
first_opcode, state_flags, page_dirtiness);
|
||||
first_opcode, state_flags);
|
||||
|
||||
profiler_stat_increment(S_COMPILE_SUCCESS);
|
||||
profiler_end(P_GEN_INSTR);
|
||||
|
@ -1468,8 +1455,7 @@ static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
|
|||
void jit_force_generate_unsafe(uint32_t phys_addr)
|
||||
{
|
||||
*instruction_pointer = phys_addr;
|
||||
uint32_t page_dirtiness = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
|
||||
jit_generate(phys_addr, page_dirtiness);
|
||||
jit_generate(phys_addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1482,36 +1468,25 @@ void cycle_internal()
|
|||
|
||||
struct code_cache* entry = find_cache_entry(phys_addr);
|
||||
|
||||
uint32_t page_dirtiness = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
|
||||
|
||||
const bool JIT_COMPILE_ONLY_AFTER_BLOCK_BOUNDARY = true;
|
||||
|
||||
if(entry && entry->group_status == page_dirtiness && !entry->pending)
|
||||
if(entry && !entry->pending)
|
||||
{
|
||||
profiler_start(P_RUN_FROM_CACHE);
|
||||
profiler_stat_increment(S_RUN_FROM_CACHE);
|
||||
|
||||
//assert(entry->opcode[0] == read8(phys_addr));
|
||||
|
||||
uint32_t old_group_status = entry->group_status;
|
||||
uint32_t old_start_address = entry->start_addr;
|
||||
uint32_t old_group_dirtiness = group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT];
|
||||
assert(old_group_status == old_group_dirtiness);
|
||||
|
||||
uint16_t wasm_table_index = entry->wasm_table_index;
|
||||
uint16_t initial_state = entry->initial_state;
|
||||
call_indirect1(wasm_table_index, initial_state);
|
||||
|
||||
// These shouldn't fail
|
||||
assert(entry->group_status == old_group_status);
|
||||
assert(entry->start_addr == old_start_address);
|
||||
// XXX: New clearing: This should fail on self-modifying code
|
||||
//assert(entry->start_addr == old_start_address);
|
||||
|
||||
// JIT compiled self-modifying code may trigger this assert
|
||||
//assert(old_group_dirtiness == group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT]);
|
||||
|
||||
UNUSED(old_group_status);
|
||||
UNUSED(old_start_address);
|
||||
UNUSED(old_group_dirtiness);
|
||||
|
||||
profiler_end(P_RUN_FROM_CACHE);
|
||||
}
|
||||
|
@ -1520,16 +1495,14 @@ void cycle_internal()
|
|||
bool did_block_boundary = !JIT_COMPILE_ONLY_AFTER_BLOCK_BOUNDARY || jit_block_boundary;
|
||||
const int32_t address_hash = jit_hot_hash(phys_addr);
|
||||
|
||||
// exists | pending | written -> should generate
|
||||
// -------+---------+---------++---------------------
|
||||
// 0 | x | x -> yes
|
||||
// 1 | 0 | 0 -> impossible (handled above)
|
||||
// 1 | 1 | 0 -> no
|
||||
// 1 | 0 | 1 -> yes
|
||||
// 1 | 1 | 1 -> yes
|
||||
// exists | pending -> should generate
|
||||
// -------+---------++---------------------
|
||||
// 0 | x -> yes
|
||||
// 1 | 0 -> impossible (handled above)
|
||||
// 1 | 1 -> no
|
||||
|
||||
if(
|
||||
(!entry || entry->group_status != page_dirtiness) &&
|
||||
!entry &&
|
||||
!is_near_end_of_page(phys_addr) && (
|
||||
ENABLE_JIT_ALWAYS ||
|
||||
(did_block_boundary && ++hot_code_addresses[address_hash] > JIT_THRESHOLD)
|
||||
|
@ -1539,7 +1512,7 @@ void cycle_internal()
|
|||
// don't immediately retry to compile
|
||||
hot_code_addresses[address_hash] = 0;
|
||||
|
||||
jit_generate(phys_addr, page_dirtiness);
|
||||
jit_generate(phys_addr);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -45,10 +45,9 @@ struct code_cache {
|
|||
int32_t opcode[1];
|
||||
int32_t len;
|
||||
#endif
|
||||
// Cleanliness status of the entry's "group" (based on
|
||||
// DIRTY_ARR_SHIFT). Value only has meaning in relation with the
|
||||
// group_dirtiness value.
|
||||
uint32_t group_status;
|
||||
|
||||
// an index into jit_cache_arr for the next code_cache entry within the same physical page
|
||||
int32_t next_index_same_page;
|
||||
|
||||
uint16_t wasm_table_index;
|
||||
uint16_t initial_state;
|
||||
|
@ -99,9 +98,11 @@ struct basic_block_list {
|
|||
|
||||
// Count of how many times prime_hash(address) has been called through a jump
|
||||
extern int32_t hot_code_addresses[HASH_PRIME];
|
||||
// An array indicating the current "initial group status" for entries that map
|
||||
// to the same group due to the shift
|
||||
extern uint32_t group_dirtiness[GROUP_DIRTINESS_LENGTH];
|
||||
|
||||
// A mapping from physical page to index into jit_cache_arr
|
||||
int32_t page_first_jit_cache_entry[GROUP_DIRTINESS_LENGTH];
|
||||
|
||||
#define JIT_CACHE_ARRAY_NO_NEXT_ENTRY (-1)
|
||||
|
||||
uint16_t wasm_table_index_free_list[0x10000];
|
||||
int32_t wasm_table_index_free_list_count;
|
||||
|
@ -128,6 +129,8 @@ void diverged(void);
|
|||
void branch_taken(void);
|
||||
void branch_not_taken(void);
|
||||
|
||||
bool same_page(int32_t, int32_t);
|
||||
|
||||
int32_t get_eflags(void);
|
||||
uint32_t translate_address_read(int32_t address);
|
||||
uint32_t translate_address_write(int32_t address);
|
||||
|
|
|
@ -25,7 +25,7 @@ extern int32_t set_cr0(int32_t);
|
|||
extern int32_t verr(int32_t);
|
||||
extern int32_t verw(int32_t);
|
||||
|
||||
extern void codegen_finalize(int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
|
||||
extern void codegen_finalize(int32_t, int32_t, int32_t, int32_t, int32_t);
|
||||
extern void log_uncompiled_code(int32_t, int32_t);
|
||||
extern void dump_function_code(const struct basic_block* basic_block, int32_t basic_block_count, int32_t end);
|
||||
|
||||
|
|
|
@ -14,6 +14,32 @@ bool in_mapped_range(uint32_t addr)
|
|||
return (addr >= 0xA0000 && addr < 0xC0000) || addr >= *memory_size;
|
||||
}
|
||||
|
||||
void jit_dirty_index(uint32_t index)
|
||||
{
|
||||
int32_t cache_array_index = page_first_jit_cache_entry[index];
|
||||
|
||||
if(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
|
||||
{
|
||||
page_first_jit_cache_entry[index] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
|
||||
|
||||
do
|
||||
{
|
||||
struct code_cache* entry = &jit_cache_arr[cache_array_index];
|
||||
|
||||
assert(same_page(index << DIRTY_ARR_SHIFT, entry->start_addr));
|
||||
entry->start_addr = 0;
|
||||
entry->wasm_table_index = 0;
|
||||
|
||||
// TODO: Free wasm table index
|
||||
|
||||
cache_array_index = entry->next_index_same_page;
|
||||
|
||||
entry->next_index_same_page = 0;
|
||||
}
|
||||
while(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* There are 3 primary ways a cached basic block will be dirtied:
|
||||
* 1. A write dirties basic block A independently (A is clean and
|
||||
|
@ -30,13 +56,9 @@ void jit_dirty_cache(uint32_t start_addr, uint32_t end_addr)
|
|||
assert(start_addr <= end_addr);
|
||||
for(uint32_t i = start_addr; i < end_addr; i++)
|
||||
{
|
||||
uint32_t idx = i >> DIRTY_ARR_SHIFT;
|
||||
// XXX: Overflow _can_ cause a stale cache (with
|
||||
// group_status=0) to be mistakenly run, but the odds are low
|
||||
// since it depends on a compiled block never being
|
||||
// re-compiled or evicted for 2^32 times that
|
||||
// another block in its group is dirtied
|
||||
group_dirtiness[idx]++;
|
||||
uint32_t index = i >> DIRTY_ARR_SHIFT;
|
||||
// XXX: Should only call once per index
|
||||
jit_dirty_index(index);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -49,14 +71,14 @@ void jit_dirty_cache_small(uint32_t start_addr, uint32_t end_addr)
|
|||
uint32_t start_index = start_addr >> DIRTY_ARR_SHIFT;
|
||||
uint32_t end_index = (end_addr - 1) >> DIRTY_ARR_SHIFT;
|
||||
|
||||
group_dirtiness[start_index]++;
|
||||
jit_dirty_index(start_index);
|
||||
|
||||
// Note: This can't happen when paging is enabled, as writes across
|
||||
// boundaries are split up on two pages
|
||||
if(start_index != end_index)
|
||||
{
|
||||
assert(end_index == start_index + 1);
|
||||
group_dirtiness[end_index]++;
|
||||
jit_dirty_index(end_index);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -66,7 +88,7 @@ void jit_dirty_cache_single(uint32_t addr)
|
|||
#if ENABLE_JIT
|
||||
uint32_t index = addr >> DIRTY_ARR_SHIFT;
|
||||
|
||||
group_dirtiness[index]++;
|
||||
jit_dirty_index(index);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -75,6 +97,13 @@ void jit_empty_cache()
|
|||
for(int32_t i = 0; i < WASM_TABLE_SIZE; i++)
|
||||
{
|
||||
jit_cache_arr[i].start_addr = 0;
|
||||
jit_cache_arr[i].next_index_same_page = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
|
||||
jit_cache_arr[i].wasm_table_index = 0;
|
||||
}
|
||||
|
||||
for(int32_t i = 0; i < GROUP_DIRTINESS_LENGTH; i++)
|
||||
{
|
||||
page_first_jit_cache_entry[i] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
|
||||
}
|
||||
|
||||
for(int32_t i = 0; i < 0xFFFF; i++)
|
||||
|
@ -88,20 +117,7 @@ void jit_empty_cache()
|
|||
|
||||
int32_t jit_invalid_cache_stat()
|
||||
{
|
||||
int32_t count = 0;
|
||||
|
||||
for(int32_t i = 0; i < WASM_TABLE_SIZE; i++)
|
||||
{
|
||||
struct code_cache* entry = &jit_cache_arr[i];
|
||||
int32_t phys_addr = entry->start_addr;
|
||||
|
||||
if(phys_addr != 0 && entry->group_status != group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT])
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
return 0; // XXX: This stat doesn't make sense anymore after immediate cleaning
|
||||
}
|
||||
|
||||
int32_t jit_unused_cache_stat()
|
||||
|
|
Loading…
Reference in a new issue