Record more stats, improve some existing stats
- How often do_many_cycles is run and how much time it takes per call - How many entries are currently being compiled ("pending") - Relative frequencies of opcodes - Reason for interpreting code (not hot, at end of page, pending or no block boundary) - Number of compiled basic blocks and entry blocks - How many pages and entries are invalidated - How often cycle_internal, do_many_cycles and do_run are called
This commit is contained in:
parent
617d32f238
commit
df6bc4fd28
|
@ -23,9 +23,21 @@ const print_stats = {
|
|||
const stat_names = [
|
||||
"COMPILE",
|
||||
"COMPILE_SUCCESS",
|
||||
"COMPILE_CUT_OFF_AT_END_OF_PAGE",
|
||||
"COMPILE_BASIC_BLOCK",
|
||||
"COMPILE_ENTRY_POINT",
|
||||
"RUN_INTERPRETED",
|
||||
"RUN_INTERPRETED_PENDING",
|
||||
"RUN_INTERPRETED_NEAR_END_OF_PAGE",
|
||||
"RUN_INTERPRETED_NO_BLOCK_BOUNDARY",
|
||||
"RUN_INTERPRETED_NOT_HOT",
|
||||
"RUN_FROM_CACHE",
|
||||
"CACHE_MISMATCH",
|
||||
"DO_RUN",
|
||||
"DO_MANY_CYCLES",
|
||||
"CYCLE_INTERNAL",
|
||||
"INVALIDATE_PAGE",
|
||||
"INVALIDATE_CACHE_ENTRY",
|
||||
"NONFAULTING_OPTIMIZATION",
|
||||
"CLEAR_TLB",
|
||||
"FULL_CLEAR_TLB",
|
||||
|
@ -48,6 +60,11 @@ const print_stats = {
|
|||
let stat = cpu.wm.exports["_profiler_stat_get"](i);
|
||||
stat = stat >= 100e6 ? Math.round(stat / 1e6) + "m" : stat >= 100e3 ? Math.round(stat / 1e3) + "k" : stat;
|
||||
text += stat_names[i] + "=" + stat + " ";
|
||||
|
||||
if(i % Math.floor(stat_names.length / 3) === Math.floor(stat_names.length / 3) - 1)
|
||||
{
|
||||
text += "\n";
|
||||
}
|
||||
}
|
||||
|
||||
text += "\n";
|
||||
|
@ -55,6 +72,8 @@ const print_stats = {
|
|||
text += "CACHE_UNUSED=" + cpu.wm.exports["_jit_unused_cache_stat"]() + "\n";
|
||||
text += "WASM_TABLE_FREE=" + cpu.wm.exports["_get_wasm_table_index_free_list_count"]() + "\n";
|
||||
|
||||
text += "do_many_cycles avg: " + do_many_cycles_total / do_many_cycles_count + "\n";
|
||||
|
||||
return text;
|
||||
},
|
||||
|
||||
|
@ -96,11 +115,13 @@ const print_stats = {
|
|||
print_wasm_basic_block_count_histogram: function(cpu)
|
||||
{
|
||||
let text = "";
|
||||
let pending_count = 0;
|
||||
const histogram = Object.create(null);
|
||||
|
||||
for(let i = 0; i < 0x10000; i++)
|
||||
{
|
||||
const length = cpu.wm.exports["_jit_get_entry_length"](i);
|
||||
pending_count += cpu.wm.exports["_jit_get_entry_pending"](i);
|
||||
histogram[length] = (histogram[length] || 0) + 1;
|
||||
}
|
||||
|
||||
|
@ -122,6 +143,8 @@ const print_stats = {
|
|||
|
||||
text += "32+:" + above + "\n";
|
||||
|
||||
text += "Pending: " + pending_count + "\n";
|
||||
|
||||
return text;
|
||||
},
|
||||
|
||||
|
@ -146,6 +169,17 @@ const print_stats = {
|
|||
const pad_length = String(max_count).length;
|
||||
|
||||
text += "Instruction counts (in 1000):\n";
|
||||
let total = 0;
|
||||
const prefixes = new Set([
|
||||
0x26, 0x2E, 0x36, 0x3E,
|
||||
0x64, 0x65, 0x66, 0x67,
|
||||
0xF0, 0xF2, 0xF3,
|
||||
]);
|
||||
for(let [i, count] of counts)
|
||||
{
|
||||
total += i < 0x100 && !prefixes.has(i) ? count : 0;
|
||||
}
|
||||
text += "Total: " + total + "\n";
|
||||
|
||||
for(let [i, count] of counts)
|
||||
{
|
||||
|
@ -181,7 +215,7 @@ const print_stats = {
|
|||
|
||||
for(let [opcode, count] of top_counts.slice(0, 100))
|
||||
{
|
||||
text += opcode.toString(16) + ":" + count + " ";
|
||||
text += opcode.toString(16) + ":" + (count / total * 100).toFixed(1) + " ";
|
||||
}
|
||||
text += "\n";
|
||||
|
||||
|
|
|
@ -9,6 +9,9 @@
|
|||
*/
|
||||
var DEBUG = true;
|
||||
|
||||
/** @const */
|
||||
var ENABLE_PROFILER = true;
|
||||
|
||||
/** @const */
|
||||
var LOG_TO_FILE = false;
|
||||
|
||||
|
|
16
src/cpu.js
16
src/cpu.js
|
@ -1141,6 +1141,8 @@ CPU.prototype.load_bios = function()
|
|||
|
||||
CPU.prototype.do_run = function()
|
||||
{
|
||||
this.wm.exports["_profiler_stat_increment_do_run"]();
|
||||
|
||||
// Idle time is when no instructions are being executed
|
||||
this.wm.exports["_profiler_end"](P_IDLE);
|
||||
|
||||
|
@ -1170,12 +1172,20 @@ CPU.prototype.do_run = function()
|
|||
this.wm.exports["_profiler_start"](P_IDLE);
|
||||
};
|
||||
|
||||
let do_many_cycles_count = 0;
|
||||
let do_many_cycles_total = 0;
|
||||
|
||||
CPU.prototype.do_many_cycles = function()
|
||||
{
|
||||
// Capture the total time we were executing instructions
|
||||
this.wm.exports["_profiler_start"](P_DO_MANY_CYCLES);
|
||||
this.coverage_logger.log_start();
|
||||
|
||||
if(ENABLE_PROFILER)
|
||||
{
|
||||
var start_time = v86.microtick();
|
||||
}
|
||||
|
||||
try {
|
||||
this.do_many_cycles_unsafe();
|
||||
}
|
||||
|
@ -1184,6 +1194,12 @@ CPU.prototype.do_many_cycles = function()
|
|||
this.exception_cleanup(e);
|
||||
}
|
||||
|
||||
if(ENABLE_PROFILER)
|
||||
{
|
||||
do_many_cycles_total += v86.microtick() - start_time;
|
||||
do_many_cycles_count++;
|
||||
}
|
||||
|
||||
this.coverage_logger.log_end();
|
||||
this.wm.exports["_profiler_end"](P_DO_MANY_CYCLES);
|
||||
this.wm.exports["_profiler_end"](P_GEN_INSTR);
|
||||
|
|
|
@ -1176,6 +1176,7 @@ static void jit_find_basic_blocks()
|
|||
current_block->next_block_addr = 0;
|
||||
current_block->end_addr = *instruction_pointer;
|
||||
current_block->condition_index = -1;
|
||||
profiler_stat_increment(S_COMPILE_CUT_OFF_AT_END_OF_PAGE);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1518,6 +1519,8 @@ static void jit_generate(uint32_t phys_addr)
|
|||
{
|
||||
struct basic_block* block = &basic_blocks.blocks[i];
|
||||
|
||||
profiler_stat_increment(S_COMPILE_BASIC_BLOCK);
|
||||
|
||||
if(block->is_entry_block && block->addr != block->end_addr)
|
||||
{
|
||||
uint32_t phys_addr = translate_address_read(block->addr);
|
||||
|
@ -1537,6 +1540,7 @@ static void jit_generate(uint32_t phys_addr)
|
|||
#endif
|
||||
|
||||
entry_point_count++;
|
||||
profiler_stat_increment(S_COMPILE_ENTRY_POINT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1568,6 +1572,7 @@ void jit_force_generate_unsafe(uint32_t phys_addr)
|
|||
|
||||
void cycle_internal()
|
||||
{
|
||||
profiler_stat_increment(S_CYCLE_INTERNAL);
|
||||
#if ENABLE_JIT
|
||||
|
||||
*previous_ip = *instruction_pointer;
|
||||
|
@ -1624,6 +1629,24 @@ void cycle_internal()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(entry)
|
||||
{
|
||||
assert(entry->pending);
|
||||
profiler_stat_increment(S_RUN_INTERPRETED_PENDING);
|
||||
}
|
||||
else if(is_near_end_of_page(phys_addr))
|
||||
{
|
||||
profiler_stat_increment(S_RUN_INTERPRETED_NEAR_END_OF_PAGE);
|
||||
}
|
||||
else if(!did_block_boundary)
|
||||
{
|
||||
profiler_stat_increment(S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY);
|
||||
}
|
||||
else
|
||||
{
|
||||
profiler_stat_increment(S_RUN_INTERPRETED_NOT_HOT);
|
||||
}
|
||||
|
||||
jit_run_interpreted(phys_addr);
|
||||
}
|
||||
}
|
||||
|
@ -1681,6 +1704,8 @@ jit_instr_flags segment_prefix_op_jit(int32_t seg)
|
|||
|
||||
void do_many_cycles_unsafe()
|
||||
{
|
||||
profiler_stat_increment(S_DO_MANY_CYCLES);
|
||||
|
||||
#if 0
|
||||
for(int32_t k = 0; k < LOOP_COUNTER; k++)
|
||||
#else
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "js_imports.h"
|
||||
#include "log.h"
|
||||
#include "memory.h"
|
||||
#include "profiler/profiler.h"
|
||||
|
||||
bool in_mapped_range(uint32_t addr)
|
||||
{
|
||||
|
@ -21,9 +22,11 @@ void jit_dirty_index(uint32_t index)
|
|||
if(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
|
||||
{
|
||||
page_first_jit_cache_entry[index] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
|
||||
profiler_stat_increment(S_INVALIDATE_PAGE);
|
||||
|
||||
do
|
||||
{
|
||||
profiler_stat_increment(S_INVALIDATE_CACHE_ENTRY);
|
||||
struct code_cache* entry = &jit_cache_arr[cache_array_index];
|
||||
|
||||
assert(same_page(index << DIRTY_ARR_SHIFT, entry->start_addr));
|
||||
|
@ -150,6 +153,12 @@ int32_t jit_get_entry_address(int32_t i)
|
|||
return jit_cache_arr[i].start_addr;
|
||||
}
|
||||
|
||||
int32_t jit_get_entry_pending(int32_t i)
|
||||
{
|
||||
assert(i >= 0 && i < JIT_CACHE_ARRAY_SIZE);
|
||||
return jit_cache_arr[i].pending;
|
||||
}
|
||||
|
||||
int32_t read8(uint32_t addr)
|
||||
{
|
||||
if(USE_A20 && *a20_enabled) addr &= A20_MASK;
|
||||
|
|
|
@ -101,6 +101,12 @@ void profiler_stat_increment(enum stat_name stat)
|
|||
profiler_stat_arr[stat].count++;
|
||||
}
|
||||
|
||||
// to be called from JS
|
||||
void profiler_stat_increment_do_run()
|
||||
{
|
||||
profiler_stat_increment(S_DO_RUN);
|
||||
}
|
||||
|
||||
int32_t profiler_stat_get(enum stat_name stat)
|
||||
{
|
||||
return profiler_stat_arr[stat].count;
|
||||
|
@ -116,6 +122,7 @@ void profiler_print(void) {}
|
|||
int32_t profiler_get_time(enum profile_name name) { UNUSED(name); return 0; }
|
||||
int32_t profiler_get_total(void) { return 0; }
|
||||
void profiler_stat_increment(enum stat_name stat) { UNUSED(stat); }
|
||||
void profiler_stat_increment_do_run() {}
|
||||
int32_t profiler_stat_get(enum stat_name stat) { UNUSED(stat); return 0; }
|
||||
|
||||
|
||||
|
|
|
@ -22,14 +22,29 @@ enum profile_name {
|
|||
};
|
||||
|
||||
|
||||
#define PROFILER_STAT_COUNT 13
|
||||
|
||||
enum stat_name {
|
||||
S_COMPILE,
|
||||
S_COMPILE_SUCCESS,
|
||||
S_COMPILE_CUT_OFF_AT_END_OF_PAGE,
|
||||
S_COMPILE_BASIC_BLOCK,
|
||||
S_COMPILE_ENTRY_POINT,
|
||||
|
||||
S_RUN_INTERPRETED,
|
||||
S_RUN_INTERPRETED_PENDING,
|
||||
S_RUN_INTERPRETED_NEAR_END_OF_PAGE,
|
||||
S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY,
|
||||
S_RUN_INTERPRETED_NOT_HOT,
|
||||
|
||||
S_RUN_FROM_CACHE,
|
||||
S_CACHE_MISMATCH,
|
||||
|
||||
S_DO_RUN,
|
||||
S_DO_MANY_CYCLES,
|
||||
S_CYCLE_INTERNAL,
|
||||
|
||||
S_INVALIDATE_PAGE,
|
||||
S_INVALIDATE_CACHE_ENTRY,
|
||||
|
||||
S_NONFAULTING_OPTIMIZATION,
|
||||
|
||||
S_CLEAR_TLB,
|
||||
|
@ -37,6 +52,7 @@ enum stat_name {
|
|||
S_TLB_FULL,
|
||||
S_TLB_GLOBAL_FULL,
|
||||
};
|
||||
#define PROFILER_STAT_COUNT (S_TLB_GLOBAL_FULL - S_COMPILE + 1)
|
||||
|
||||
struct profiler_stat {
|
||||
int32_t count;
|
||||
|
|
Loading…
Reference in a new issue