Helper functions for logging coverage

Notes: - The coverage_dump_loop doesn't really help much, but it helps keep the memory usage a bit down for longer running tests (eg. booting Linux) - Linux still slows down too much and can't callibrate its time to boot - Dumping the incoming data without structuring by file results in very large files (~150-300 mb for the nasm tests). Structuring by fn_id/fn_name slows execution down, but allows for more manageable coverage data - Buffering data in memory and writing to disk synchronously is faster / about the same speed as async or buffering + async IO.
2018-01-24 11:17:02 +05:30 · 2018-01-24 11:17:02 +05:30 · 1a7172feab
parent 96ec4e0457
commit 1a7172feab
6 changed files with 159 additions and 2 deletions
--- a/src/browser/starter.js
+++ b/src/browser/starter.js
@ -96,6 +96,51 @@ function V86Starter(options)
    var cpu;
    var mem;
    var mem8;
+
+    const is_env_node = v86util.check_env_node();
+
+    function coverage_log(fn_name_offset, num_blocks, visited_block) {
+        // This may be called before cpu is even created, through the Codegen initialization
+        if(DEBUG && is_env_node && cpu && cpu.wm && cpu.should_log_coverage[0])
+        {
+            // fn_name_offset is an offset in the data section to where the string of the function
+            // name is stored, so it varies by memoryBase, whereas cov_* objects' fn_id doesn't
+            const fn_id = fn_name_offset - cpu.wm.imports.env["memoryBase"];
+            if(!(fn_id in cpu.cov_visit_logs))
+            {
+                // Static functions may not be "discovered" in coverage_init - we currently simply
+                // skip them
+                return;
+            }
+            const log_pos = cpu.cov_pos[fn_id];
+            const existing_entry = cpu.cov_visit_logs[fn_id].indexOf(visited_block);
+            if((existing_entry > -1 && existing_entry < log_pos) || num_blocks > 0xFF)
+            {
+                // If we'd like to profile frequency of code visited, we should be using counters
+                // instead. This approach was simply faster to measure coverage.
+                return;
+            }
+
+            cpu.cov_total_blocks[fn_id] = num_blocks;
+            cpu.cov_visit_logs[fn_id][log_pos] = visited_block;
+            cpu.cov_pos[fn_id]++;
+
+            if(log_pos >= cpu.cov_visit_logs[fn_id].length - 1)
+            {
+                cpu.coverage_dump();
+                cpu.cov_pos[fn_id] = 0;
+            }
+        }
+    }
+
+    if(DEBUG)
+    {
+        this.bus.register("emulator-stopped", function()
+        {
+            cpu.coverage_dump();
+        }, this);
+    }
+
    var wasm_shared_funcs = {
        "___assert_fail": (condition, file, line, fun) => {
            const memory = mem8;
@ -221,6 +266,7 @@ function V86Starter(options)
        "_get_time": () => Date.now(),

        "_codegen_finalize": (cache_index, virt_start, start, end) => cpu.codegen_finalize(cache_index, virt_start, start, end),
+        "_coverage_log": coverage_log,

        // see https://github.com/kripken/emscripten/blob/incoming/src/library.js
        "_atan2": Math.atan2,
--- a/src/const.js
+++ b/src/const.js
@ -370,3 +370,8 @@ const INTERNAL_MEM_SIZE = 4096 + 0x100000 * 6;

 /** @const */
 const WASM_PAGE_SIZE = 64 * 1024;
+
+/** @const */
+const COVERAGE_EXPORT_PREFIX = "___profn_";
+/** @const */
+const COVERAGE_FILE_PREFIX = "cov_data";
--- a/src/cpu.js
+++ b/src/cpu.js
@ -17,6 +17,7 @@ function CPU(bus, wm, codegen)
    this.codegen = codegen;
    this.wasm_patch(wm);
    this.create_jit_imports();
+    this.coverage_init();

    this.memory_size = new Uint32Array(wm.memory.buffer, 812, 1);

@ -272,6 +273,90 @@ CPU.prototype.create_jit_imports = function()
    this.jit_imports = imports;
 };

+const is_env_node = v86util.check_env_node();
+const fs = is_env_node && require("fs");
+const path = is_env_node && require("path");
+const Buffer = is_env_node && require("buffer")["Buffer"];
+
+CPU.prototype.coverage_dump = function()
+{
+    if(!DEBUG)
+    {
+        return;
+    }
+
+    for(let fn_id of Object.keys(this.cov_fn_map))
+    {
+        if(this.cov_pos[fn_id])
+        {
+            const fn_name = this.cov_fn_map[fn_id];
+            const total_blocks = this.cov_total_blocks[fn_id];
+            const filename = path.join(
+                __dirname,
+                `${COVERAGE_FILE_PREFIX}_${fn_name}_${total_blocks}`
+            );
+            // XXX: Experiment more with async I/O - preliminarily it seemed to choke the nasm test
+            // even when limiting max files open simultaneously
+            fs["appendFileSync"](
+                filename,
+                Buffer.from(this.cov_visit_logs[fn_id].buffer, 0, this.cov_pos[fn_id])
+            );
+            this.cov_visit_logs[fn_id].fill(0);
+            this.cov_pos[fn_id] = 0;
+        }
+    }
+};
+
+CPU.prototype.coverage_dump_loop = function()
+{
+    if(!DEBUG)
+    {
+        return;
+    }
+
+    const self = this;
+    this.coverage_dump();
+    setTimeout(function()
+    {
+        self.coverage_dump_loop();
+    }, 2000);
+};
+
+CPU.prototype.coverage_init = function()
+{
+    if(!DEBUG)
+    {
+        return;
+    }
+
+    // fn_id -> func_name
+    this.cov_fn_map = {};
+    // fn_id -> block_covered ([0,2,3])
+    this.cov_visit_logs = {};
+    // Position within cov_visit_logs from where to append data
+    this.cov_pos = {};
+    // Total number of conditional blocks in fn_id
+    this.cov_total_blocks = {};
+
+    for(let name of Object.keys(this.wm.exports))
+    {
+        if(name.startsWith(COVERAGE_EXPORT_PREFIX))
+        {
+            const fn_id = this.wm.exports[name];
+            this.cov_fn_map[fn_id] = name.slice(COVERAGE_EXPORT_PREFIX.length);
+            this.cov_visit_logs[fn_id] = new Uint8Array(8);
+            this.cov_pos[fn_id] = 0;
+            this.cov_total_blocks[fn_id] = 0;
+        }
+    }
+
+    this.should_log_coverage = new Int32Array(this.wm.memory.buffer, 1128, 1);
+    this.should_log_coverage[0] = 0;
+
+    // XXX: Periodically dump all logs - this helps a speed longer tests up a bit, but it isn't enough
+    this.coverage_dump_loop();
+};
+
 CPU.prototype.wasm_patch = function(wm)
 {
    this.add = this.wm.exports['_add'];
--- a/src/native/global_pointers.h
+++ b/src/native/global_pointers.h
@ -94,4 +94,6 @@ static double_t* const fpu_st = (double_t* const) 968;
 static uint8_t* const fpu_st8 = (uint8_t* const) 968;
 static int32_t* const fpu_st32 = (int32_t* const) 968;

+static int32_t* const should_log_coverage = (int32_t* const) 1128;
+
 static union reg64* const reg_mmx = (union reg64* const) 1064; // length 64
--- a/src/native/instructions.c
+++ b/src/native/instructions.c
@ -1431,10 +1431,9 @@ void instr32_FF_5_mem(int32_t addr)
 }
 DEFINE_MODRM_INSTR1_READ32(instr32_FF_6, push32(___))

-
-
 void run_instruction(int32_t opcode)
 {
+    should_log_coverage[0] = 1;
    // XXX: This table is generated. Don't modify
 switch(opcode)
 {
@ -6305,6 +6304,8 @@ switch(opcode)
    default:
        assert(false);
 }
+
+    should_log_coverage[0] = 0;
 }

 void jit_instruction(int32_t opcode)
--- a/src/native/llvm-prof.ll
+++ b/src/native/llvm-prof.ll
@ -0,0 +1,18 @@
+target datalayout = "e-p:32:32-i64:64-v128:32:128-n32-S128"
+target triple = "asmjs-unknown-emscripten"
+
+declare void @coverage_log(i8* %arg1, i32 %arg2, i32 %arg3)
+
+; See:
+;   - https://llvm.org/doxygen/classllvm_1_1InstrProfIncrementInst.html#details
+;   - https://llvm.org/doxygen/IntrinsicInst_8h_source.html#l00715
+;   - https://llvm.org/docs/CoverageMappingFormat.html#coverage-mapping-counter
+
+; LLVM wouldn't let us redefine an intrinsic (llvm.instrprof.increment), but we can override it
+; anyway because Emscripten converts it to the following:
+
+define void @llvm_instrprof_increment(i8* %func_name, i64 %func_hash, i32 %num_counters, i32 %index)
+{
+  call void @coverage_log(i8* %func_name, i32 %num_counters, i32 %index)
+  ret void
+}