Record more stats, improve some existing stats

- How often do_many_cycles is run and how much time it takes per call
- How many entries are currently being compiled ("pending")
- Relative frequencies of opcodes
- Reason for interpreting code (not hot, at end of page, pending or no
  block boundary)
- Number of compiled basic blocks and entry blocks
- How many pages and entries are invalidated
- How often cycle_internal, do_many_cycles and do_run are called
This commit is contained in:
Fabian 2018-05-06 19:13:00 -05:00
parent 617d32f238
commit df6bc4fd28
7 changed files with 113 additions and 3 deletions

View file

@ -23,9 +23,21 @@ const print_stats = {
const stat_names = [
"COMPILE",
"COMPILE_SUCCESS",
"COMPILE_CUT_OFF_AT_END_OF_PAGE",
"COMPILE_BASIC_BLOCK",
"COMPILE_ENTRY_POINT",
"RUN_INTERPRETED",
"RUN_INTERPRETED_PENDING",
"RUN_INTERPRETED_NEAR_END_OF_PAGE",
"RUN_INTERPRETED_NO_BLOCK_BOUNDARY",
"RUN_INTERPRETED_NOT_HOT",
"RUN_FROM_CACHE",
"CACHE_MISMATCH",
"DO_RUN",
"DO_MANY_CYCLES",
"CYCLE_INTERNAL",
"INVALIDATE_PAGE",
"INVALIDATE_CACHE_ENTRY",
"NONFAULTING_OPTIMIZATION",
"CLEAR_TLB",
"FULL_CLEAR_TLB",
@ -48,6 +60,11 @@ const print_stats = {
let stat = cpu.wm.exports["_profiler_stat_get"](i);
stat = stat >= 100e6 ? Math.round(stat / 1e6) + "m" : stat >= 100e3 ? Math.round(stat / 1e3) + "k" : stat;
text += stat_names[i] + "=" + stat + " ";
if(i % Math.floor(stat_names.length / 3) === Math.floor(stat_names.length / 3) - 1)
{
text += "\n";
}
}
text += "\n";
@ -55,6 +72,8 @@ const print_stats = {
text += "CACHE_UNUSED=" + cpu.wm.exports["_jit_unused_cache_stat"]() + "\n";
text += "WASM_TABLE_FREE=" + cpu.wm.exports["_get_wasm_table_index_free_list_count"]() + "\n";
text += "do_many_cycles avg: " + do_many_cycles_total / do_many_cycles_count + "\n";
return text;
},
@ -96,11 +115,13 @@ const print_stats = {
print_wasm_basic_block_count_histogram: function(cpu)
{
let text = "";
let pending_count = 0;
const histogram = Object.create(null);
for(let i = 0; i < 0x10000; i++)
{
const length = cpu.wm.exports["_jit_get_entry_length"](i);
pending_count += cpu.wm.exports["_jit_get_entry_pending"](i);
histogram[length] = (histogram[length] || 0) + 1;
}
@ -122,6 +143,8 @@ const print_stats = {
text += "32+:" + above + "\n";
text += "Pending: " + pending_count + "\n";
return text;
},
@ -146,6 +169,17 @@ const print_stats = {
const pad_length = String(max_count).length;
text += "Instruction counts (in 1000):\n";
let total = 0;
const prefixes = new Set([
0x26, 0x2E, 0x36, 0x3E,
0x64, 0x65, 0x66, 0x67,
0xF0, 0xF2, 0xF3,
]);
for(let [i, count] of counts)
{
total += i < 0x100 && !prefixes.has(i) ? count : 0;
}
text += "Total: " + total + "\n";
for(let [i, count] of counts)
{
@ -181,7 +215,7 @@ const print_stats = {
for(let [opcode, count] of top_counts.slice(0, 100))
{
text += opcode.toString(16) + ":" + count + " ";
text += opcode.toString(16) + ":" + (count / total * 100).toFixed(1) + " ";
}
text += "\n";

View file

@ -9,6 +9,9 @@
*/
var DEBUG = true;
/** @const */
var ENABLE_PROFILER = true;
/** @const */
var LOG_TO_FILE = false;

View file

@ -1141,6 +1141,8 @@ CPU.prototype.load_bios = function()
CPU.prototype.do_run = function()
{
this.wm.exports["_profiler_stat_increment_do_run"]();
// Idle time is when no instructions are being executed
this.wm.exports["_profiler_end"](P_IDLE);
@ -1170,12 +1172,20 @@ CPU.prototype.do_run = function()
this.wm.exports["_profiler_start"](P_IDLE);
};
let do_many_cycles_count = 0;
let do_many_cycles_total = 0;
CPU.prototype.do_many_cycles = function()
{
// Capture the total time we were executing instructions
this.wm.exports["_profiler_start"](P_DO_MANY_CYCLES);
this.coverage_logger.log_start();
if(ENABLE_PROFILER)
{
var start_time = v86.microtick();
}
try {
this.do_many_cycles_unsafe();
}
@ -1184,6 +1194,12 @@ CPU.prototype.do_many_cycles = function()
this.exception_cleanup(e);
}
if(ENABLE_PROFILER)
{
do_many_cycles_total += v86.microtick() - start_time;
do_many_cycles_count++;
}
this.coverage_logger.log_end();
this.wm.exports["_profiler_end"](P_DO_MANY_CYCLES);
this.wm.exports["_profiler_end"](P_GEN_INSTR);

View file

@ -1176,6 +1176,7 @@ static void jit_find_basic_blocks()
current_block->next_block_addr = 0;
current_block->end_addr = *instruction_pointer;
current_block->condition_index = -1;
profiler_stat_increment(S_COMPILE_CUT_OFF_AT_END_OF_PAGE);
break;
}
@ -1518,6 +1519,8 @@ static void jit_generate(uint32_t phys_addr)
{
struct basic_block* block = &basic_blocks.blocks[i];
profiler_stat_increment(S_COMPILE_BASIC_BLOCK);
if(block->is_entry_block && block->addr != block->end_addr)
{
uint32_t phys_addr = translate_address_read(block->addr);
@ -1537,6 +1540,7 @@ static void jit_generate(uint32_t phys_addr)
#endif
entry_point_count++;
profiler_stat_increment(S_COMPILE_ENTRY_POINT);
}
}
@ -1568,6 +1572,7 @@ void jit_force_generate_unsafe(uint32_t phys_addr)
void cycle_internal()
{
profiler_stat_increment(S_CYCLE_INTERNAL);
#if ENABLE_JIT
*previous_ip = *instruction_pointer;
@ -1624,6 +1629,24 @@ void cycle_internal()
}
else
{
if(entry)
{
assert(entry->pending);
profiler_stat_increment(S_RUN_INTERPRETED_PENDING);
}
else if(is_near_end_of_page(phys_addr))
{
profiler_stat_increment(S_RUN_INTERPRETED_NEAR_END_OF_PAGE);
}
else if(!did_block_boundary)
{
profiler_stat_increment(S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY);
}
else
{
profiler_stat_increment(S_RUN_INTERPRETED_NOT_HOT);
}
jit_run_interpreted(phys_addr);
}
}
@ -1681,6 +1704,8 @@ jit_instr_flags segment_prefix_op_jit(int32_t seg)
void do_many_cycles_unsafe()
{
profiler_stat_increment(S_DO_MANY_CYCLES);
#if 0
for(int32_t k = 0; k < LOOP_COUNTER; k++)
#else

View file

@ -8,6 +8,7 @@
#include "js_imports.h"
#include "log.h"
#include "memory.h"
#include "profiler/profiler.h"
bool in_mapped_range(uint32_t addr)
{
@ -21,9 +22,11 @@ void jit_dirty_index(uint32_t index)
if(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY)
{
page_first_jit_cache_entry[index] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY;
profiler_stat_increment(S_INVALIDATE_PAGE);
do
{
profiler_stat_increment(S_INVALIDATE_CACHE_ENTRY);
struct code_cache* entry = &jit_cache_arr[cache_array_index];
assert(same_page(index << DIRTY_ARR_SHIFT, entry->start_addr));
@ -150,6 +153,12 @@ int32_t jit_get_entry_address(int32_t i)
return jit_cache_arr[i].start_addr;
}
int32_t jit_get_entry_pending(int32_t i)
{
assert(i >= 0 && i < JIT_CACHE_ARRAY_SIZE);
return jit_cache_arr[i].pending;
}
int32_t read8(uint32_t addr)
{
if(USE_A20 && *a20_enabled) addr &= A20_MASK;

View file

@ -101,6 +101,12 @@ void profiler_stat_increment(enum stat_name stat)
profiler_stat_arr[stat].count++;
}
// to be called from JS
void profiler_stat_increment_do_run()
{
profiler_stat_increment(S_DO_RUN);
}
int32_t profiler_stat_get(enum stat_name stat)
{
return profiler_stat_arr[stat].count;
@ -116,6 +122,7 @@ void profiler_print(void) {}
int32_t profiler_get_time(enum profile_name name) { UNUSED(name); return 0; }
int32_t profiler_get_total(void) { return 0; }
void profiler_stat_increment(enum stat_name stat) { UNUSED(stat); }
void profiler_stat_increment_do_run() {}
int32_t profiler_stat_get(enum stat_name stat) { UNUSED(stat); return 0; }

View file

@ -22,14 +22,29 @@ enum profile_name {
};
#define PROFILER_STAT_COUNT 13
enum stat_name {
S_COMPILE,
S_COMPILE_SUCCESS,
S_COMPILE_CUT_OFF_AT_END_OF_PAGE,
S_COMPILE_BASIC_BLOCK,
S_COMPILE_ENTRY_POINT,
S_RUN_INTERPRETED,
S_RUN_INTERPRETED_PENDING,
S_RUN_INTERPRETED_NEAR_END_OF_PAGE,
S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY,
S_RUN_INTERPRETED_NOT_HOT,
S_RUN_FROM_CACHE,
S_CACHE_MISMATCH,
S_DO_RUN,
S_DO_MANY_CYCLES,
S_CYCLE_INTERNAL,
S_INVALIDATE_PAGE,
S_INVALIDATE_CACHE_ENTRY,
S_NONFAULTING_OPTIMIZATION,
S_CLEAR_TLB,
@ -37,6 +52,7 @@ enum stat_name {
S_TLB_FULL,
S_TLB_GLOBAL_FULL,
};
#define PROFILER_STAT_COUNT (S_TLB_GLOBAL_FULL - S_COMPILE + 1)
struct profiler_stat {
int32_t count;