Keep track of cached code entries per page

- Use this data structure to delete cached code immediately when page is
  written, not later when wasm index is reused
- Remove "dirty page" data structure
- Simplify cycle_internal, as no entries can be found dirty, they are
  removed immediately after being overwritten
This commit is contained in:
Fabian 2018-04-24 16:53:19 -05:00
parent f2c8957319
commit 2fd4e50c93
6 changed files with 93 additions and 101 deletions

View file

@ -223,8 +223,8 @@ function V86Starter(options)
},
"_get_time": Date.now,
"_codegen_finalize": (wasm_table_index, start, end, first_opcode, state_flags, page_dirtiness) => {
cpu.codegen_finalize(wasm_table_index, start, end, first_opcode, state_flags, page_dirtiness);
"_codegen_finalize": (wasm_table_index, start, end, first_opcode, state_flags) => {
cpu.codegen_finalize(wasm_table_index, start, end, first_opcode, state_flags);
},
"_coverage_log": (fn_name_offset, num_blocks, visited_block) => {
coverage_logger.log(fn_name_offset, num_blocks, visited_block);

View file

@ -1248,7 +1248,7 @@ if(PROFILING)
var seen_code = {};
var seen_code_uncompiled = {};
CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_opcode, state_flags, page_dirtiness)
CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_opcode, state_flags)
{
dbg_assert(wasm_table_index >= 0 && wasm_table_index < WASM_TABLE_SIZE);
//dbg_log("finalize");
@ -1300,7 +1300,7 @@ CPU.prototype.codegen_finalize = function(wasm_table_index, start, end, first_op
this.wm.exports["_codegen_finalize_finished"](
wasm_table_index, start, end,
first_opcode, state_flags, page_dirtiness);
first_opcode, state_flags);
// The following will throw if f isn't an exported function
this.wm.imports["env"].table.set(wasm_table_index, f);

View file

@ -26,7 +26,8 @@ struct code_cache jit_cache_arr[WASM_TABLE_SIZE] = {
.opcode = {0},
.len = 0,
#endif
.group_status = 0,
.next_index_same_page = 0,
.wasm_table_index = 0,
.initial_state = 0,
.state_flags = 0,
@ -38,7 +39,8 @@ uint64_t tsc_offset = 0;
uint32_t jit_block_boundary = 0;
int32_t hot_code_addresses[HASH_PRIME] = {0};
uint32_t group_dirtiness[GROUP_DIRTINESS_LENGTH] = {0};
int32_t page_first_jit_cache_entry[GROUP_DIRTINESS_LENGTH] = {0};
uint16_t wasm_table_index_free_list[0x10000] = { 0 };
int32_t wasm_table_index_free_list_count = 0;
@ -630,9 +632,7 @@ static struct code_cache* create_cache_entry(uint32_t phys_addr)
uint16_t addr_index = (phys_addr + i) & JIT_PHYS_MASK;
struct code_cache* entry = &jit_cache_arr[addr_index];
uint32_t page_dirtiness = group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT];
if(!entry->start_addr || entry->group_status != page_dirtiness)
if(!entry->start_addr)
{
if(i > 0)
{
@ -640,6 +640,22 @@ static struct code_cache* create_cache_entry(uint32_t phys_addr)
phys_addr, jit_cache_arr[addr_index - 1].start_addr);
}
uint32_t page = phys_addr >> 12;
int32_t previous_entry_index = page_first_jit_cache_entry[page];
if(previous_entry_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
{
struct code_cache* previous_entry = &jit_cache_arr[previous_entry_index];
if(previous_entry->start_addr)
{
assert(same_page(previous_entry->start_addr, phys_addr));
}
}
page_first_jit_cache_entry[page] = addr_index;
entry->next_index_same_page = previous_entry_index;
return entry;
}
}
@ -648,6 +664,8 @@ static struct code_cache* create_cache_entry(uint32_t phys_addr)
uint16_t addr_index = phys_addr & JIT_PHYS_MASK;
struct code_cache* entry = &jit_cache_arr[addr_index];
// TODO: Free wasm table index
profiler_stat_increment(S_CACHE_MISMATCH);
return entry;
}
@ -657,7 +675,7 @@ static bool is_near_end_of_page(uint32_t addr)
return (addr & 0xFFF) >= (0x1000 - 16);
}
static bool same_page(int32_t addr1, int32_t addr2)
bool same_page(int32_t addr1, int32_t addr2)
{
return (addr1 & ~0xFFF) == (addr2 & ~0xFFF);
}
@ -766,9 +784,8 @@ static void jit_generate_basic_block(int32_t start_addr, int32_t stop_addr)
void codegen_finalize_finished(
int32_t wasm_table_index, uint32_t phys_addr, uint32_t end_addr,
int32_t first_opcode, cached_state_flags state_flags, uint32_t page_dirtiness)
int32_t first_opcode, cached_state_flags state_flags)
{
if(page_dirtiness == group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT])
{
// XXX: Avoid looping through entire table here (requires different data structure)
@ -788,20 +805,14 @@ void codegen_finalize_finished(
// sanity check that we're looking at the right entry
assert(entry->pending);
assert(entry->group_status == page_dirtiness);
assert(entry->start_addr == phys_addr);
assert(entry->state_flags == state_flags);
#endif
UNUSED(page_dirtiness);
UNUSED(phys_addr);
UNUSED(state_flags);
UNUSED(end_addr);
UNUSED(first_opcode);
}
else
{
// the page has been written, drop this entry
}
}
static struct code_cache* find_cache_entry(uint32_t phys_addr)
@ -823,29 +834,6 @@ static struct code_cache* find_cache_entry(uint32_t phys_addr)
return NULL;
}
struct code_cache* find_link_block_target(int32_t target)
{
int32_t eip = *previous_ip;
if(same_page(eip, target))
{
assert((eip & ~0xFFF) == *last_virt_eip);
assert((target & ~0xFFF) == *last_virt_eip);
uint32_t phys_target = *eip_phys ^ target;
struct code_cache* entry = find_cache_entry(phys_target);
if(entry &&
!entry->pending &&
entry->group_status == group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT])
{
return entry;
}
}
return NULL;
}
void jit_link_block_conditional(int32_t offset, const char* condition)
{
// Note: block linking cannot rely on the absolute value of eip, as blocks
@ -1232,7 +1220,7 @@ static void jit_find_basic_blocks()
}
}
static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
static void jit_generate(uint32_t phys_addr)
{
profiler_stat_increment(S_COMPILE);
profiler_start(P_GEN_INSTR);
@ -1431,7 +1419,6 @@ static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
entry->start_addr = phys_addr;
entry->state_flags = state_flags;
entry->group_status = page_dirtiness;
entry->pending = true;
entry->initial_state = i;
entry->wasm_table_index = wasm_table_index;
@ -1455,7 +1442,7 @@ static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
// will call codegen_finalize_finished asynchronously when finished
codegen_finalize(
wasm_table_index, phys_addr, end_addr,
first_opcode, state_flags, page_dirtiness);
first_opcode, state_flags);
profiler_stat_increment(S_COMPILE_SUCCESS);
profiler_end(P_GEN_INSTR);
@ -1468,8 +1455,7 @@ static void jit_generate(uint32_t phys_addr, uint32_t page_dirtiness)
void jit_force_generate_unsafe(uint32_t phys_addr)
{
*instruction_pointer = phys_addr;
uint32_t page_dirtiness = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
jit_generate(phys_addr, page_dirtiness);
jit_generate(phys_addr);
}
#endif
@ -1482,36 +1468,25 @@ void cycle_internal()
struct code_cache* entry = find_cache_entry(phys_addr);
uint32_t page_dirtiness = group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT];
const bool JIT_COMPILE_ONLY_AFTER_BLOCK_BOUNDARY = true;
if(entry && entry->group_status == page_dirtiness && !entry->pending)
if(entry && !entry->pending)
{
profiler_start(P_RUN_FROM_CACHE);
profiler_stat_increment(S_RUN_FROM_CACHE);
//assert(entry->opcode[0] == read8(phys_addr));
uint32_t old_group_status = entry->group_status;
uint32_t old_start_address = entry->start_addr;
uint32_t old_group_dirtiness = group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT];
assert(old_group_status == old_group_dirtiness);
uint16_t wasm_table_index = entry->wasm_table_index;
uint16_t initial_state = entry->initial_state;
call_indirect1(wasm_table_index, initial_state);
// These shouldn't fail
assert(entry->group_status == old_group_status);
assert(entry->start_addr == old_start_address);
// XXX: New clearing: This should fail on self-modifying code
//assert(entry->start_addr == old_start_address);
// JIT compiled self-modifying code may trigger this assert
//assert(old_group_dirtiness == group_dirtiness[entry->start_addr >> DIRTY_ARR_SHIFT]);
UNUSED(old_group_status);
UNUSED(old_start_address);
UNUSED(old_group_dirtiness);
profiler_end(P_RUN_FROM_CACHE);
}
@ -1520,16 +1495,14 @@ void cycle_internal()
bool did_block_boundary = !JIT_COMPILE_ONLY_AFTER_BLOCK_BOUNDARY || jit_block_boundary;
const int32_t address_hash = jit_hot_hash(phys_addr);
// exists | pending | written -> should generate
// -------+---------+---------++---------------------
// 0 | x | x -> yes
// 1 | 0 | 0 -> impossible (handled above)
// 1 | 1 | 0 -> no
// 1 | 0 | 1 -> yes
// 1 | 1 | 1 -> yes
// exists | pending -> should generate
// -------+---------++---------------------
// 0 | x -> yes
// 1 | 0 -> impossible (handled above)
// 1 | 1 -> no
if(
(!entry || entry->group_status != page_dirtiness) &&
!entry &&
!is_near_end_of_page(phys_addr) && (
ENABLE_JIT_ALWAYS ||
(did_block_boundary && ++hot_code_addresses[address_hash] > JIT_THRESHOLD)
@ -1539,7 +1512,7 @@ void cycle_internal()
// don't immediately retry to compile
hot_code_addresses[address_hash] = 0;
jit_generate(phys_addr, page_dirtiness);
jit_generate(phys_addr);
}
else
{

View file

@ -45,10 +45,9 @@ struct code_cache {
int32_t opcode[1];
int32_t len;
#endif
// Cleanliness status of the entry's "group" (based on
// DIRTY_ARR_SHIFT). Value only has meaning in relation with the
// group_dirtiness value.
uint32_t group_status;
// an index into jit_cache_arr for the next code_cache entry within the same physical page
int32_t next_index_same_page;
uint16_t wasm_table_index;
uint16_t initial_state;
@ -99,9 +98,11 @@ struct basic_block_list {
// Count of how many times prime_hash(address) has been called through a jump
extern int32_t hot_code_addresses[HASH_PRIME];
// An array indicating the current "initial group status" for entries that map
// to the same group due to the shift
extern uint32_t group_dirtiness[GROUP_DIRTINESS_LENGTH];
// A mapping from physical page to index into jit_cache_arr
int32_t page_first_jit_cache_entry[GROUP_DIRTINESS_LENGTH];
#define JIT_CACHE_ARRAY_NO_NEXT_ENTRY (-1)
uint16_t wasm_table_index_free_list[0x10000];
int32_t wasm_table_index_free_list_count;
@ -128,6 +129,8 @@ void diverged(void);
void branch_taken(void);
void branch_not_taken(void);
bool same_page(int32_t, int32_t);
int32_t get_eflags(void);
uint32_t translate_address_read(int32_t address);
uint32_t translate_address_write(int32_t address);

View file

@ -25,7 +25,7 @@ extern int32_t set_cr0(int32_t);
extern int32_t verr(int32_t);
extern int32_t verw(int32_t);
extern void codegen_finalize(int32_t, int32_t, int32_t, int32_t, int32_t, int32_t);
extern void codegen_finalize(int32_t, int32_t, int32_t, int32_t, int32_t);
extern void log_uncompiled_code(int32_t, int32_t);
extern void dump_function_code(const struct basic_block* basic_block, int32_t basic_block_count, int32_t end);

View file

@ -14,6 +14,32 @@ bool in_mapped_range(uint32_t addr)
return (addr >= 0xA0000 && addr < 0xC0000) || addr >= *memory_size;
}
void jit_dirty_index(uint32_t index)
{
int32_t cache_array_index = page_first_jit_cache_entry[index];
if(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
{
page_first_jit_cache_entry[index] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
do
{
struct code_cache* entry = &jit_cache_arr[cache_array_index];
assert(same_page(index << DIRTY_ARR_SHIFT, entry->start_addr));
entry->start_addr = 0;
entry->wasm_table_index = 0;
// TODO: Free wasm table index
cache_array_index = entry->next_index_same_page;
entry->next_index_same_page = 0;
}
while(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY);
}
}
/*
* There are 3 primary ways a cached basic block will be dirtied:
* 1. A write dirties basic block A independently (A is clean and
@ -30,13 +56,9 @@ void jit_dirty_cache(uint32_t start_addr, uint32_t end_addr)
assert(start_addr <= end_addr);
for(uint32_t i = start_addr; i < end_addr; i++)
{
uint32_t idx = i >> DIRTY_ARR_SHIFT;
// XXX: Overflow _can_ cause a stale cache (with
// group_status=0) to be mistakenly run, but the odds are low
// since it depends on a compiled block never being
// re-compiled or evicted for 2^32 times that
// another block in its group is dirtied
group_dirtiness[idx]++;
uint32_t index = i >> DIRTY_ARR_SHIFT;
// XXX: Should only call once per index
jit_dirty_index(index);
}
#endif
}
@ -49,14 +71,14 @@ void jit_dirty_cache_small(uint32_t start_addr, uint32_t end_addr)
uint32_t start_index = start_addr >> DIRTY_ARR_SHIFT;
uint32_t end_index = (end_addr - 1) >> DIRTY_ARR_SHIFT;
group_dirtiness[start_index]++;
jit_dirty_index(start_index);
// Note: This can't happen when paging is enabled, as writes across
// boundaries are split up on two pages
if(start_index != end_index)
{
assert(end_index == start_index + 1);
group_dirtiness[end_index]++;
jit_dirty_index(end_index);
}
#endif
}
@ -66,7 +88,7 @@ void jit_dirty_cache_single(uint32_t addr)
#if ENABLE_JIT
uint32_t index = addr >> DIRTY_ARR_SHIFT;
group_dirtiness[index]++;
jit_dirty_index(index);
#endif
}
@ -75,6 +97,13 @@ void jit_empty_cache()
for(int32_t i = 0; i < WASM_TABLE_SIZE; i++)
{
jit_cache_arr[i].start_addr = 0;
jit_cache_arr[i].next_index_same_page = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
jit_cache_arr[i].wasm_table_index = 0;
}
for(int32_t i = 0; i < GROUP_DIRTINESS_LENGTH; i++)
{
page_first_jit_cache_entry[i] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
}
for(int32_t i = 0; i < 0xFFFF; i++)
@ -88,20 +117,7 @@ void jit_empty_cache()
int32_t jit_invalid_cache_stat()
{
int32_t count = 0;
for(int32_t i = 0; i < WASM_TABLE_SIZE; i++)
{
struct code_cache* entry = &jit_cache_arr[i];
int32_t phys_addr = entry->start_addr;
if(phys_addr != 0 && entry->group_status != group_dirtiness[phys_addr >> DIRTY_ARR_SHIFT])
{
count++;
}
}
return count;
return 0; // XXX: This stat doesn't make sense anymore after immediate cleaning
}
int32_t jit_unused_cache_stat()