diff --git a/src/browser/print_stats.js b/src/browser/print_stats.js index 95050bf8..1356b6f9 100644 --- a/src/browser/print_stats.js +++ b/src/browser/print_stats.js @@ -23,9 +23,21 @@ const print_stats = { const stat_names = [ "COMPILE", "COMPILE_SUCCESS", + "COMPILE_CUT_OFF_AT_END_OF_PAGE", + "COMPILE_BASIC_BLOCK", + "COMPILE_ENTRY_POINT", "RUN_INTERPRETED", + "RUN_INTERPRETED_PENDING", + "RUN_INTERPRETED_NEAR_END_OF_PAGE", + "RUN_INTERPRETED_NO_BLOCK_BOUNDARY", + "RUN_INTERPRETED_NOT_HOT", "RUN_FROM_CACHE", "CACHE_MISMATCH", + "DO_RUN", + "DO_MANY_CYCLES", + "CYCLE_INTERNAL", + "INVALIDATE_PAGE", + "INVALIDATE_CACHE_ENTRY", "NONFAULTING_OPTIMIZATION", "CLEAR_TLB", "FULL_CLEAR_TLB", @@ -48,6 +60,11 @@ const print_stats = { let stat = cpu.wm.exports["_profiler_stat_get"](i); stat = stat >= 100e6 ? Math.round(stat / 1e6) + "m" : stat >= 100e3 ? Math.round(stat / 1e3) + "k" : stat; text += stat_names[i] + "=" + stat + " "; + + if(i % Math.floor(stat_names.length / 3) === Math.floor(stat_names.length / 3) - 1) + { + text += "\n"; + } } text += "\n"; @@ -55,6 +72,8 @@ const print_stats = { text += "CACHE_UNUSED=" + cpu.wm.exports["_jit_unused_cache_stat"]() + "\n"; text += "WASM_TABLE_FREE=" + cpu.wm.exports["_get_wasm_table_index_free_list_count"]() + "\n"; + text += "do_many_cycles avg: " + do_many_cycles_total / do_many_cycles_count + "\n"; + return text; }, @@ -96,11 +115,13 @@ const print_stats = { print_wasm_basic_block_count_histogram: function(cpu) { let text = ""; + let pending_count = 0; const histogram = Object.create(null); for(let i = 0; i < 0x10000; i++) { const length = cpu.wm.exports["_jit_get_entry_length"](i); + pending_count += cpu.wm.exports["_jit_get_entry_pending"](i); histogram[length] = (histogram[length] || 0) + 1; } @@ -122,6 +143,8 @@ const print_stats = { text += "32+:" + above + "\n"; + text += "Pending: " + pending_count + "\n"; + return text; }, @@ -146,6 +169,17 @@ const print_stats = { const pad_length = String(max_count).length; text += "Instruction counts (in 1000):\n"; + let total = 0; + const prefixes = new Set([ + 0x26, 0x2E, 0x36, 0x3E, + 0x64, 0x65, 0x66, 0x67, + 0xF0, 0xF2, 0xF3, + ]); + for(let [i, count] of counts) + { + total += i < 0x100 && !prefixes.has(i) ? count : 0; + } + text += "Total: " + total + "\n"; for(let [i, count] of counts) { @@ -181,7 +215,7 @@ const print_stats = { for(let [opcode, count] of top_counts.slice(0, 100)) { - text += opcode.toString(16) + ":" + count + " "; + text += opcode.toString(16) + ":" + (count / total * 100).toFixed(1) + " "; } text += "\n"; diff --git a/src/config.js b/src/config.js index 6019994c..4a5334d1 100644 --- a/src/config.js +++ b/src/config.js @@ -9,6 +9,9 @@ */ var DEBUG = true; +/** @const */ +var ENABLE_PROFILER = true; + /** @const */ var LOG_TO_FILE = false; diff --git a/src/cpu.js b/src/cpu.js index bdc947bf..7daad487 100644 --- a/src/cpu.js +++ b/src/cpu.js @@ -1141,6 +1141,8 @@ CPU.prototype.load_bios = function() CPU.prototype.do_run = function() { + this.wm.exports["_profiler_stat_increment_do_run"](); + // Idle time is when no instructions are being executed this.wm.exports["_profiler_end"](P_IDLE); @@ -1170,12 +1172,20 @@ CPU.prototype.do_run = function() this.wm.exports["_profiler_start"](P_IDLE); }; +let do_many_cycles_count = 0; +let do_many_cycles_total = 0; + CPU.prototype.do_many_cycles = function() { // Capture the total time we were executing instructions this.wm.exports["_profiler_start"](P_DO_MANY_CYCLES); this.coverage_logger.log_start(); + if(ENABLE_PROFILER) + { + var start_time = v86.microtick(); + } + try { this.do_many_cycles_unsafe(); } @@ -1184,6 +1194,12 @@ CPU.prototype.do_many_cycles = function() this.exception_cleanup(e); } + if(ENABLE_PROFILER) + { + do_many_cycles_total += v86.microtick() - start_time; + do_many_cycles_count++; + } + this.coverage_logger.log_end(); this.wm.exports["_profiler_end"](P_DO_MANY_CYCLES); this.wm.exports["_profiler_end"](P_GEN_INSTR); diff --git a/src/native/cpu.c b/src/native/cpu.c index 319b7885..c242a8c2 100644 --- a/src/native/cpu.c +++ b/src/native/cpu.c @@ -1176,6 +1176,7 @@ static void jit_find_basic_blocks() current_block->next_block_addr = 0; current_block->end_addr = *instruction_pointer; current_block->condition_index = -1; + profiler_stat_increment(S_COMPILE_CUT_OFF_AT_END_OF_PAGE); break; } @@ -1518,6 +1519,8 @@ static void jit_generate(uint32_t phys_addr) { struct basic_block* block = &basic_blocks.blocks[i]; + profiler_stat_increment(S_COMPILE_BASIC_BLOCK); + if(block->is_entry_block && block->addr != block->end_addr) { uint32_t phys_addr = translate_address_read(block->addr); @@ -1537,6 +1540,7 @@ static void jit_generate(uint32_t phys_addr) #endif entry_point_count++; + profiler_stat_increment(S_COMPILE_ENTRY_POINT); } } @@ -1568,6 +1572,7 @@ void jit_force_generate_unsafe(uint32_t phys_addr) void cycle_internal() { + profiler_stat_increment(S_CYCLE_INTERNAL); #if ENABLE_JIT *previous_ip = *instruction_pointer; @@ -1624,6 +1629,24 @@ void cycle_internal() } else { + if(entry) + { + assert(entry->pending); + profiler_stat_increment(S_RUN_INTERPRETED_PENDING); + } + else if(is_near_end_of_page(phys_addr)) + { + profiler_stat_increment(S_RUN_INTERPRETED_NEAR_END_OF_PAGE); + } + else if(!did_block_boundary) + { + profiler_stat_increment(S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY); + } + else + { + profiler_stat_increment(S_RUN_INTERPRETED_NOT_HOT); + } + jit_run_interpreted(phys_addr); } } @@ -1681,6 +1704,8 @@ jit_instr_flags segment_prefix_op_jit(int32_t seg) void do_many_cycles_unsafe() { + profiler_stat_increment(S_DO_MANY_CYCLES); + #if 0 for(int32_t k = 0; k < LOOP_COUNTER; k++) #else diff --git a/src/native/memory.c b/src/native/memory.c index 7b0e789e..a9857594 100644 --- a/src/native/memory.c +++ b/src/native/memory.c @@ -8,6 +8,7 @@ #include "js_imports.h" #include "log.h" #include "memory.h" +#include "profiler/profiler.h" bool in_mapped_range(uint32_t addr) { @@ -21,9 +22,11 @@ void jit_dirty_index(uint32_t index) if(cache_array_index != JIT_CACHE_ARRAY_NO_NEXT_ENTRY) { page_first_jit_cache_entry[index] = JIT_CACHE_ARRAY_NO_NEXT_ENTRY; + profiler_stat_increment(S_INVALIDATE_PAGE); do { + profiler_stat_increment(S_INVALIDATE_CACHE_ENTRY); struct code_cache* entry = &jit_cache_arr[cache_array_index]; assert(same_page(index << DIRTY_ARR_SHIFT, entry->start_addr)); @@ -150,6 +153,12 @@ int32_t jit_get_entry_address(int32_t i) return jit_cache_arr[i].start_addr; } +int32_t jit_get_entry_pending(int32_t i) +{ + assert(i >= 0 && i < JIT_CACHE_ARRAY_SIZE); + return jit_cache_arr[i].pending; +} + int32_t read8(uint32_t addr) { if(USE_A20 && *a20_enabled) addr &= A20_MASK; diff --git a/src/native/profiler/profiler.c b/src/native/profiler/profiler.c index 1fe84cee..6d74518d 100644 --- a/src/native/profiler/profiler.c +++ b/src/native/profiler/profiler.c @@ -101,6 +101,12 @@ void profiler_stat_increment(enum stat_name stat) profiler_stat_arr[stat].count++; } +// to be called from JS +void profiler_stat_increment_do_run() +{ + profiler_stat_increment(S_DO_RUN); +} + int32_t profiler_stat_get(enum stat_name stat) { return profiler_stat_arr[stat].count; @@ -116,6 +122,7 @@ void profiler_print(void) {} int32_t profiler_get_time(enum profile_name name) { UNUSED(name); return 0; } int32_t profiler_get_total(void) { return 0; } void profiler_stat_increment(enum stat_name stat) { UNUSED(stat); } +void profiler_stat_increment_do_run() {} int32_t profiler_stat_get(enum stat_name stat) { UNUSED(stat); return 0; } diff --git a/src/native/profiler/profiler.h b/src/native/profiler/profiler.h index 434c3f3c..4d402625 100644 --- a/src/native/profiler/profiler.h +++ b/src/native/profiler/profiler.h @@ -22,14 +22,29 @@ enum profile_name { }; -#define PROFILER_STAT_COUNT 13 - enum stat_name { S_COMPILE, S_COMPILE_SUCCESS, + S_COMPILE_CUT_OFF_AT_END_OF_PAGE, + S_COMPILE_BASIC_BLOCK, + S_COMPILE_ENTRY_POINT, + S_RUN_INTERPRETED, + S_RUN_INTERPRETED_PENDING, + S_RUN_INTERPRETED_NEAR_END_OF_PAGE, + S_RUN_INTERPRETED_NO_BLOCK_BOUNDARY, + S_RUN_INTERPRETED_NOT_HOT, + S_RUN_FROM_CACHE, S_CACHE_MISMATCH, + + S_DO_RUN, + S_DO_MANY_CYCLES, + S_CYCLE_INTERNAL, + + S_INVALIDATE_PAGE, + S_INVALIDATE_CACHE_ENTRY, + S_NONFAULTING_OPTIMIZATION, S_CLEAR_TLB, @@ -37,6 +52,7 @@ enum stat_name { S_TLB_FULL, S_TLB_GLOBAL_FULL, }; +#define PROFILER_STAT_COUNT (S_TLB_GLOBAL_FULL - S_COMPILE + 1) struct profiler_stat { int32_t count;